Пример #1
0
    def _load_mapfiles(self):
        """
        Load data map file, instrument map file, and sky map file.
        Update the 'skip' fields in these map files: if 'skip' is True in any
        of the maps, then 'skip' must be set to True in all maps.
        """
        self.logger.debug("Loading map files:"
            "\n\tdata map: %s\n\tinstrument map: %s\n\tsky map: %s" % (
                self.inputs['args'][0], 
                self.inputs['instrument_mapfile'],
                self.inputs['sky_mapfile']
            )
        )
        self.data_map = DataMap.load(self.inputs['args'][0])
        self.inst_map = DataMap.load(self.inputs['instrument_mapfile'])
        self.sky_map = DataMap.load(self.inputs['sky_mapfile'])

        if not validate_data_maps(self.data_map, self.inst_map, self.sky_map):
            self.logger.error("Validation of input data mapfiles failed")
            return False

        # Update the skip fields of the three maps. If 'skip' is True in any of
        # these maps, then 'skip' must be set to True in all maps.
        for x, y, z in zip(self.data_map, self.inst_map, self.sky_map):
            x.skip = y.skip = z.skip = (x.skip or y.skip or z.skip)
        
        return True
Пример #2
0
def plugin_main(args, **kwargs):
    """
    Matchs the hosts in one datamap with those in another

    Parameters
    ----------
    mapfile_in : str, optional
        Filename of datamap to adjust
    mapfile_to_match : str, optional
        Filename of datamap to match

    """
    mapfile_in = kwargs['mapfile_in']
    mapfile_to_match = kwargs['mapfile_to_match']

    map_in = DataMap.load(mapfile_in)
    map_in.iterator = DataMap.SkipIterator
    map_to_match = DataMap.load(mapfile_to_match)
    map_to_match.iterator = DataMap.SkipIterator

    hosts_to_match = []
    for item in map_to_match:
        hosts_to_match.append(item.host)

    for item, host in zip(map_in, hosts_to_match):
        item.host = host

    map_in.save(mapfile_in)
Пример #3
0
    def go(self):
        super(imager_create_dbs, self).go()

        # get assoc_theta, convert from empty string if needed 
        assoc_theta = self.inputs["assoc_theta"]
        if assoc_theta == "":
            assoc_theta = None

        # Load mapfile data from files
        self.logger.info(self.inputs["slice_paths_mapfile"])
        slice_paths_map = MultiDataMap.load(self.inputs["slice_paths_mapfile"])
        input_map = DataMap.load(self.inputs['args'][0])
        source_list_map = DataMap.load(self.inputs['source_list_map_path'])

        if self._validate_input_data(input_map, slice_paths_map):
            return 1

        # Run the nodes with now collected inputs
        jobs, output_map = self._run_create_dbs_node(
                 input_map, slice_paths_map, assoc_theta,
                 source_list_map)

        # Collect the output of the node scripts write to (map) files
        return self._collect_and_assign_outputs(jobs, output_map,
                                    slice_paths_map)
Пример #4
0
    def _load_mapfiles(self):
        """
        Load data map file, instrument map file, and sky map file.
        Update the 'skip' fields in these map files: if 'skip' is True in any
        of the maps, then 'skip' must be set to True in all maps.
        """
        self.logger.debug(
            "Loading map files:"
            "\n\tdata map: %s\n\tinstrument map: %s\n\tsky map: %s" %
            (self.inputs['args'][0], self.inputs['instrument_mapfile'],
             self.inputs['sky_mapfile']))
        self.data_map = DataMap.load(self.inputs['args'][0])
        self.inst_map = DataMap.load(self.inputs['instrument_mapfile'])
        self.sky_map = DataMap.load(self.inputs['sky_mapfile'])

        if not validate_data_maps(self.data_map, self.inst_map, self.sky_map):
            self.logger.error("Validation of input data mapfiles failed")
            return False

        # Update the skip fields of the three maps. If 'skip' is True in any of
        # these maps, then 'skip' must be set to True in all maps.
        for x, y, z in zip(self.data_map, self.inst_map, self.sky_map):
            x.skip = y.skip = z.skip = (x.skip or y.skip or z.skip)

        return True
Пример #5
0
    def go(self):
        super(imager_create_dbs, self).go()

        # get assoc_theta, convert from empty string if needed
        assoc_theta = self.inputs["assoc_theta"]
        if assoc_theta == "":
            assoc_theta = None

        # Load mapfile data from files
        self.logger.info(self.inputs["slice_paths_mapfile"])
        slice_paths_map = MultiDataMap.load(self.inputs["slice_paths_mapfile"])
        input_map = DataMap.load(self.inputs['args'][0])
        source_list_map = DataMap.load(self.inputs['source_list_map_path'])

        if self._validate_input_data(input_map, slice_paths_map):
            return 1

        # Run the nodes with now collected inputs
        jobs, output_map = self._run_create_dbs_node(
                 input_map, slice_paths_map, assoc_theta,
                 source_list_map)

        # Collect the output of the node scripts write to (map) files
        return self._collect_and_assign_outputs(jobs, output_map,
                                    slice_paths_map)
Пример #6
0
def plugin_main(args, **kwargs):
    """
    Takes in list of targets and an h5parm solution set and returns a list of stations
    in the target data which mismatch the calibrator solutions antenna table
    
    Parameters
    ----------
    mapfile_in : str
        Mapfile for input measurement sets
    h5parmdb: str
        Location of the solution h5parm set
    solset_name: str
        Name of the solution set of the corresponding h5parm set to compare with
    filter: str
        Default filter constrains for the ndppp_prep_target step (usually removing International Baselines)
    
    Returns
    -------
    result : dict
        Output station names to filter
    """
    mapfile_in = kwargs['mapfile_in']
    h5parmdb = kwargs['h5parmdb']
    solset_name = kwargs['solset_name']
    filter = kwargs['filter']
    data = DataMap.load(mapfile_in)
    mslist = [data[i].file for i in xrange(len(data))]

    #mslist      = MSfiles.lstrip('[').rstrip(']').replace(' ','').replace("'","").split(',')

    if len(mslist) == 0:
        raise ValueError(
            "Did not find any existing directory in input MS list!")
        pass
    else:
        MS = mslist[0]
        pass

    ## reading ANTENNA table of MS
    antennaFile = MS + "/ANTENNA"
    logging.info('Collecting information from the ANTENNA table.')
    antennaTable = pt.table(antennaFile, ack=False)
    antennaNames = antennaTable.getcol('NAME')

    ## reading ANTENNA information of h5parm
    data = h5parm(h5parmdb, readonly=True)
    solset = data.getSolset(solset_name)
    station_names = solset.getAnt().keys()

    ## check whether there are more stations in the target than in the calibrator solutions
    missing_stations = list(set(antennaNames) - set(station_names))
    for missing_station in missing_stations:
        filter += ';!' + missing_station + '*'
        pass

    ## return results
    result = {'filter': str(filter)}
    return result

    pass
Пример #7
0
def plugin_main(args, **kwargs):
    """
    Prunes entries from a mapfile

    Parameters
    ----------
    mapfile_in : str
        Filename of datamap to trim
    prune_str : str
        Entries starting with this string will be removed.

    Returns
    -------
    result : dict
        New datamap filename

    """
    mapfile_in = kwargs['mapfile_in']
    prune_str = kwargs['prune_str'].lower()
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']
    prunelen = len(prune_str)

    map_out = DataMap([])
    map_in = DataMap.load(mapfile_in)

    for i, item in enumerate(map_in):
        if item.file[:prunelen].lower() != prune_str:
            map_out.data.append(DataProduct(item.host, item.file, item.skip))

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
Пример #8
0
def plugin_main(args, **kwargs):
    """
    Takes in list of targets and an h5parm solution set and returns a list of stations
    in the target data which mismatch the calibrator solutions antenna table
    
    Parameters
    ----------
    mapfile_in : str
        Mapfile for input measurement sets
    filter: str
        Default filter constrains for the ndppp_prep_target step (usually removing International Baselines)
    
    Returns
    -------
    result : dict
        Output station names to filter
    """
    mapfile_in = kwargs['mapfile_in']
    data = DataMap.load(mapfile_in)
    mslist = [data[i].file for i in xrange(len(data))]
    msfile = mslist[0]

    observationTable = pyrap.tables.table(msfile + '::OBSERVATION')
    targetName = observationTable.getcol('LOFAR_TARGET')['array'][0]

    ## return results
    result = {'targetName': targetName}
    return result

    pass
Пример #9
0
def plugin_main(args, **kwargs):
    """
    Appends a string to filenames in a mapfile

    Parameters
    ----------
    mapfile_in : str
        Filename of datamap to append to
    append : str
        String to append
    append_index : bool
        If True, append a unique index to each file
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile

    Returns
    -------
    result : dict
        New datamap filename

    """
    mapfile_in = kwargs['mapfile_in']

    if 'append_index' in kwargs:
        append_index = kwargs['append_index']
        if type(append_index) is str:
            if append_index.lower() == 'true':
                append_index = True
            else:
                append_index = False
    else:
        append_index = False

    append_str = kwargs['append']
    if append_str == 'None':
        append_str = ''
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']

    map_out = DataMap([])
    map_in = DataMap.load(mapfile_in)

    for i, item in enumerate(map_in):
        if append_index:
            map_out.data.append(
                DataProduct(item.host,
                            item.file + append_str + '_{}'.format(i),
                            item.skip))
        else:
            map_out.data.append(
                DataProduct(item.host, item.file + append_str, item.skip))

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
def plugin_main(args, **kwargs):
    """
    Copies each entry of mapfile_in as often as the the length of the corresponding 
    group into a new mapfile

    Parameters
    ----------
    mapfile_in : str
        Name of the input mapfile to be expanded. (E.g. with the skymodels for the 
        different groups.)
    mapfile_groups : str
        Name of the multi-mapfile with the given groups. Number of groups need
        to be the same as the number of files in mapfile_in. 
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile
    ignore_dummies: str (optional)
        If true, do not count dummy entries when expanding

    Returns
    -------
    result : dict
        Output datamap filename

    """
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']

    try:
        ignore_dummies = str(kwargs['ignore_dummies'])	# if the user has defined a dummy preference, follow it, otherwise count dummies as usual
        ignore_dummies = ignore_dummies in ['true', 'True', '1', 'T', 't']
    except:
        ignore_dummies = False

    inmap = DataMap.load(kwargs['mapfile_in'])
    groupmap = MultiDataMap.load(kwargs['mapfile_groups'])

    if len(inmap) != len(groupmap):
        raise ValueError('PipelineStep_mapfileSingleToGroup: length of {0} and {1} differ'.format(kwargs['mapfile_in'],kwargs['mapfile_groups']))

    map_out = DataMap([])
    inindex = 0

    if ignore_dummies:
        for groupID in xrange(len(groupmap)):
            for fileID in xrange(len(groupmap[groupID].file)):
                if (groupmap[groupID].file)[fileID] != 'dummy_entry':
                        map_out.data.append(DataProduct(inmap[groupID].host, inmap[groupID].file, (inmap[groupID].skip or groupmap[groupID].skip) ))
    else:
        for groupID in xrange(len(groupmap)):
            for fileID in xrange(len(groupmap[groupID].file)):
                map_out.data.append(DataProduct(inmap[groupID].host, inmap[groupID].file, (inmap[groupID].skip or groupmap[groupID].skip) ))

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
Пример #11
0
def plugin_main(args, **kwargs):
    fileid = kwargs['mapfile_in']
    datamap = DataMap.load(fileid)
    hdf5File = os.path.join(kwargs['hdf5_dir'],kwargs['hdf5file'])
    if kwargs.has_key('instrument'):
        instrument = kwargs['instrument']
    else:
        instrument = '/instrument'
    if kwargs.has_key('compression'):
        compression = int(kwargs['compression'])
    else:
        compression = 5
    if kwargs.has_key('solset'):
        solsetName = kwargs['solset']
    else:
        solsetName = None


    # Check is all the necessary files are available
    antennaFile = os.path.join(datamap[0].file,'ANTENNA')
    if not os.path.isdir(antennaFile):
        logging.critical('Missing ANTENNA table.')
        sys.exit(1)
    fieldFile = os.path.join(datamap[0].file,'FIELD')
    if not os.path.isdir(fieldFile):
        logging.critical('Missing FIELD table.')
        sys.exit(1)
    skydbFile = os.path.join(datamap[0].file,'sky')
    if not os.path.isdir(skydbFile):
        logging.critical('Missing sky table.')
        sys.exit(1)
        
    #generate list of parmDB-filenames
    parmDBnames = [ MS.file+instrument for MS in datamap ]

    #create and fill the hdf5-file:
    solset = parmDBs2h5parm(hdf5File, parmDBnames, antennaFile, fieldFile, skydbFile, compression=compression, solsetName=solsetName)

    # Add CREATE entry to history 
    h5parmDB = h5parm(hdf5File, readonly = False)
    soltabs = h5parmDB.getSoltabs(solset=solset)
    for st in soltabs:
        sw = solWriter(soltabs[st])
        sw.addHistory('CREATE (by PipelineStep_losotoImporter from %s / %s - %s)' % (os.path.abspath(''), 
                                   os.path.basename(parmDBnames[0]), os.path.basename(parmDBnames[-1]) ) )
    h5parmDB.close()

    #generate mapfile and wrap up
    mapfileentry = {}
    mapfileentry['host'] = 'localhost'
    mapfileentry['file'] = hdf5File
    mapfileentry['skip'] = False            
    outfileid = os.path.join(kwargs['mapfile_dir'], kwargs['filename'])
    outmap = open(outfileid, 'w')
    outmap.write(repr([mapfileentry]))
    outmap.close()
    result = {}
    result['mapfile'] = outfileid
    return result
Пример #12
0
def plugin_main(args, **kwargs):
    """
    Checks a "check" mapfile for values of 'None' and, if found, changes the
    input mapfile "file" to "empty".

    Note: the check and input mapfiles must have the same length

    Parameters
    ----------
    mapfile_in : str
        Name of the input mapfile from which to select files.
    mapfile_check : str
        Name of the mapfile to check for None
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile

    Returns
    -------
    result : dict
        Output datamap filename

    """
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']

    inmap = DataMap.load(kwargs['mapfile_in'])
    checkmap = DataMap.load(kwargs['mapfile_check'])

    if len(inmap) != len(checkmap):
        raise ValueError('Input and check mapfiles must have the same length')

    map_out = DataMap([])
    for checkitem, item in zip(checkmap, inmap):
        if checkitem.file.lower() == 'none':
            map_out.data.append(DataProduct(item.host, 'empty', item.skip))
        else:
            map_out.append(item)

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
Пример #13
0
    def finalize(self):
        """
        Finalize this operation
        """
        # Add output datamaps to direction object for later reference
        self.direction.input_files_single_mapfile = os.path.join(
            self.pipeline_mapfile_dir, 'input_files_single.mapfile')
        self.direction.verify_subtract_mapfile = os.path.join(
            self.pipeline_mapfile_dir, 'verify_subtract.break.mapfile')
        self.direction.dir_dep_parmdb_mapfile = os.path.join(
            self.pipeline_mapfile_dir,
            'merge_normalized_selfcal_parmdbs.mapfile')
        self.direction.converted_parmdb_mapfile = os.path.join(
            self.pipeline_mapfile_dir,
            'convert_normalized_merged_selfcal_parmdbs.mapfile')
        self.direction.dir_indep_skymodels_mapfile = os.path.join(
            self.pipeline_mapfile_dir, 'full_skymodels.mapfile')
        self.direction.selfcal_plots_mapfile = os.path.join(
            self.pipeline_mapfile_dir, 'make_selfcal_plots.mapfile')
        if self.direction.create_preapply_h5parm:
            self.direction.preapply_parmdb_mapfile = os.path.join(
                self.pipeline_mapfile_dir, 'create_preapply_h5parm.mapfile')
        self.direction.sourcedb_new_facet_sources = os.path.join(
            self.pipeline_mapfile_dir,
            'make_sourcedb_new_facet_sources_for_facet_imaging.mapfile')
        self.direction.diff_models_field_mapfile = os.path.join(
            self.pipeline_mapfile_dir, 'predict_and_difference_models.mapfile')

        # Store results of verify_subtract check. This will work if the verification
        # was done using multiple bands although we use only one at the moment
        if (os.path.exists(self.direction.verify_subtract_mapfile) and
                not self.parset['calibration_specific']['skip_selfcal_check']):
            ok_mapfile = DataMap.load(self.direction.verify_subtract_mapfile)
            ok_flags = [ast.literal_eval(item.file) for item in ok_mapfile]
            if all(ok_flags):
                self.direction.selfcal_ok = True
            else:
                self.direction.selfcal_ok = False
        elif self.parset['calibration_specific']['skip_selfcal_check']:
            self.direction.selfcal_ok = True
        else:
            self.direction.selfcal_ok = False

        # Delete temp data
        self.direction.cleanup_mapfiles = [
            os.path.join(self.pipeline_mapfile_dir, 'shift_cal.mapfile'),
            os.path.join(self.pipeline_mapfile_dir, 'concat_data.mapfile'),
            os.path.join(self.pipeline_mapfile_dir, 'apply_dir_dep.mapfile'),
            os.path.join(self.pipeline_mapfile_dir, 'average_pre.mapfile'),
            os.path.join(self.pipeline_mapfile_dir, 'average_post.mapfile'),
            os.path.join(self.pipeline_mapfile_dir,
                         'sorted_groups.mapfile_groups')
        ]
        self.log.debug('Cleaning up files (direction: {})'.format(
            self.direction.name))
        self.direction.cleanup()
        self.cleanup()
def plugin_main(args, **kwargs):
    """
    Selects those files from mapfile_in that have the same filename-base as the one in
    mapfile_reference.

    Parameters
    ----------
    mapfile_in : str
        Name of the input mapfile from which to select files.
    mapfile_reference : str
        Name of the reference mapfile
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile

    Returns
    -------
    result : dict
        Output datamap filename

    """
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']

    inmap = DataMap.load(kwargs['mapfile_in'])
    refmap = DataMap.load(kwargs['mapfile_reference'])

    map_out = DataMap([])

    basenames = [
        os.path.splitext(os.path.basename(item.file))[0] for item in inmap
    ]
    for refitem in refmap:
        refbase = os.path.splitext(os.path.basename(refitem.file))[0]
        idx = basenames.index(refbase)
        map_out.append(inmap[idx])

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
def plugin_main(args, **kwargs):
    """
    Appends a string to filenames in a mapfile

    Parameters
    ----------
    mapfile_in : str
        Filename of datamap to append to
    append : str
        String to append
    append_index : bool
        If True, append a unique index to each file
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile

    Returns
    -------
    result : dict
        New datamap filename

    """
    mapfile_in = kwargs['mapfile_in']

    if 'append_index' in kwargs:
        append_index = kwargs['append_index']
        if type(append_index) is str:
            if append_index.lower() == 'true':
                append_index = True
            else:
                append_index = False
    else:
        append_index = False

    append_str = kwargs['append']
    if append_str == 'None':
        append_str = ''
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']

    map_out = DataMap([])
    map_in = DataMap.load(mapfile_in)

    for i, item in enumerate(map_in):
        if append_index:
            map_out.data.append(DataProduct(item.host, item.file+append_str+'_{}'.format(i), item.skip))
        else:
            map_out.data.append(DataProduct(item.host, item.file+append_str, item.skip))

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
def plugin_main(args, **kwargs):
    """
    Makes a mapfile for list of files

    Parameters
    ----------
    files : list or str
        List of files or mapfile with such a list as the only entry. May be
        given as a list of strings or as a string (e.g.,
        '[s1.skymodel, s2.skymodel]'
    hosts : list or str
        List of hosts/nodes. May be given as a list or as a string (e.g.,
        '[host1, host2]'
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile

    Returns
    -------
    result : dict
        Output datamap filename

    """
    if type(kwargs['files']) is str:
        try:
            # Check if input is mapfile containing list as a string
            map_in = DataMap.load(kwargs['files'])
            in_files = [item.file for item in map_in]
            files = []
            for f in in_files:
                files += f.strip('[]').split(',')
        except:
            files = kwargs['files']
            files = files.strip('[]').split(',')
        files = [f.strip() for f in files]
    if type(kwargs['hosts']) is str:
        hosts = kwargs['hosts'].strip('[]').split(',')
        hosts = [h.strip() for h in hosts]
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']

    for i in range(len(files)-len(hosts)):
        hosts.append(hosts[i])

    map_out = DataMap([])
    for h, f in zip(hosts, files):
        map_out.data.append(DataProduct(h, f, False))

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
Пример #17
0
def plugin_main(args, **kwargs):

    mapfile_in = kwargs['mapfile_in']
    station_filter = kwargs['station_filter']
    data = DataMap.load(mapfile_in)
    mslist = [data[i].file for i in range(len(data))]

    ## derive the fraction of flagged data of the entire observation
    print('Reading data.')
    logging.info('Reading data.')
    pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
    flagged_fraction_dict = pool.map(find_flagged_fraction, mslist)

    print('Apply station filter ' + str(station_filter))
    logging.info('Apply station filter ' + str(station_filter))
    flagged_fraction_data = {}
    for entry in flagged_fraction_dict:
        antennas = entry.keys()
        selected_stations = [
            station_name for station_name in antennas
            if re.match(station_filter, station_name)
        ]
        if len(selected_stations) == 0:
            logging.error('No stations left after filtering.')
            return (1)
        for antenna in selected_stations:
            try:
                flagged_fraction_data[antenna].append(float(entry[antenna]))
            except KeyError:
                flagged_fraction_data[antenna] = [float(entry[antenna])]

    flagged_fraction_list = []
    sorted_stations = sorted(flagged_fraction_data.keys())
    for antenna in sorted_stations:
        flagged_fraction = sum(flagged_fraction_data[antenna]) / len(
            flagged_fraction_data[antenna])
        flagged_fraction_list.append(flagged_fraction)
        try:
            flagged_fraction_data[flagged_fraction].append(antenna)
        except KeyError:
            flagged_fraction_data[flagged_fraction] = [antenna]

    min_flagged_fraction = min(flagged_fraction_list)
    refant = flagged_fraction_data[min_flagged_fraction][0]
    logging.info('Selected station ' + str(refant) +
                 ' as reference antenna. Fraction of flagged data is ' +
                 '{:>3}'.format('{:.1f}'.format(min_flagged_fraction) + '%'))
    print('Selected station ' + str(refant) +
          ' as reference antenna. Fraction of flagged data is ' +
          '{:>3}'.format('{:.1f}'.format(min_flagged_fraction) + '%'))

    ## return results
    result = {'refant': str(refant)}
    return (result)
Пример #18
0
    def go(self):
        # TODO: Remove dependency on mapfile_dir 
        self.logger.info("Starting copier run")
        super(copier, self).go()

        # Load data from mapfiles
        self.source_map = DataMap.load(self.inputs['mapfile_source'])
        self.target_map = DataMap.load(self.inputs['mapfile_target'])

        # validate data in mapfiles
        if not self._validate_mapfiles(self.inputs['allow_rename']):
            return 1

        # Run the compute nodes with the node specific mapfiles
        for source, target in zip(self.source_map, self.target_map):
            args = [source.host, source.file, target.file]
            self.append_job(target.host, args)

        # start the jobs, return the exit status.
        return self.run_jobs()
Пример #19
0
def _create_mapfile_ato(inmap):
    maps = DataMap([])
    mapsin = DataMap.load(inmap)
    mapsin.iterator = DataMap.SkipIterator
    newlist = ''
    for i, item in enumerate(mapsin):
        newlist = newlist + item.file + ','
    newlist = newlist.rstrip(',')
    newlist = '[' + newlist + ']'
    maps.data.append(DataProduct('localhost', newlist, False))
    return maps
def plugin_main(args, **kwargs):
    """
    Makes a mapfile by compressing input mapfile items into one item

    Parameters
    ----------
    mapfile_in : str
        Filename of datamap containing MS files
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile
    list_format : bool, optional
        If True, the compreseed item will use a Python list format (e.g.,
        '[file1, file2, ...]'. If False, it will be a space-separated list (e.g.,
        'file1 file2 ...'

    Returns
    -------
    result : dict
        New parmdb datamap filename

    """
    mapfile_in = kwargs['mapfile_in']
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']
    if 'list_format' in kwargs:
        list_format = kwargs['list_format']
    else:
        list_format = True
    if type(list_format) is str:
        if list_format.lower() == 'true':
            list_format = True
        else:
            list_format = False

    map_in = DataMap.load(mapfile_in)
    map_out = DataMap([])
    map_in.iterator = DataMap.SkipIterator
    file_list = [item.file for item in map_in]
    if list_format:
        newlist = '[{0}]'.format(','.join(file_list))
    else:
        newlist = '{0}'.format(' '.join(file_list))

    # Just assign host of first file to compressed file
    hosts = [item.host for item in map_in]
    map_out.data.append(DataProduct(hosts[0], newlist, False))

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
Пример #21
0
def plugin_main(args, **kwargs):
    """
    Makes a mapfile for list of files

    Parameters
    ----------
    files : list or str
        List of files or mapfile with such a list as the only entry. May be
        given as a list of strings or as a string (e.g.,
        '[s1.skymodel, s2.skymodel]'
    hosts : list or str
        List of hosts/nodes. May be given as a list or as a string (e.g.,
        '[host1, host2]'
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile

    Returns
    -------
    result : dict
        Output datamap filename

    """
    if type(kwargs['files']) is str:
        try:
            # Check if input is mapfile containing list as a string
            map_in = DataMap.load(kwargs['files'])
            in_files = [item.file for item in map_in]
            files = []
            for f in in_files:
                files += f.strip('[]').split(',')
        except:
            files = kwargs['files']
            files = files.strip('[]').split(',')
        files = [f.strip() for f in files]
    if type(kwargs['hosts']) is str:
        hosts = kwargs['hosts'].strip('[]').split(',')
        hosts = [h.strip() for h in hosts]
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']

    for i in range(len(files) - len(hosts)):
        hosts.append(hosts[i])

    map_out = DataMap([])
    for h, f in zip(hosts, files):
        map_out.data.append(DataProduct(h, f, False))

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
Пример #22
0
def update_state(dir_input):
    """
    Updates the paths in mapfiles or state files

    Parameters
    ----------
    dir_input : str
        Directory containing files to update

    """
    file_list = glob.glob(os.path.join(dir_input, '*'))

    if dir_input.endswith('mapfiles'):
        # Assume path is a pipeline mapfiles directory. In this case, we can
        # simply substitute the new working_dir for the old one in each of the
        # mapfiles
        working_dir = dir_input.split('results/')[0]
        for f in file_list:
            map = DataMap.load(f)
            for item in map:
                if '/' in item.file:
                    old_working_dir = item.file.split('results/')[0]
                    item.file = item.file.replace(old_working_dir, working_dir)
            map.save(f)
    elif dir_input.endswith('state'):
        # Assume path is the Factor state directory. In this case, we can try to
        # load files as pickled state files and look for paths inside. If found,
        # substitute new working_dir for the old one
        working_dir = os.path.dirname(dir_input)
        for f in file_list:
            try:
                with open(f, "rb") as fp:
                    d = pickle.load(fp)
                    for k, v in d.iteritems():
                        if type(v) is str:
                            if k == 'working_dir':
                                d[k] = working_dir
                            if '/' in v:
                                for infix in ['results/', 'state/', 'chunks/']:
                                    parts = v.split(infix)
                                    if len(parts) > 1:
                                        d[k] = os.path.join(working_dir, infix, parts[-1])
                        elif type(v) is list:
                            for i, l in enumerate(v):
                                if '/' in l:
                                    for infix in ['results/', 'state/', 'chunks/']:
                                        parts = l.split(infix)
                                        if len(parts) > 1:
                                            v[i] = os.path.join(working_dir, infix, parts[-1])
                            d[k] = v
                with open(f, "w") as fp:
                    pickle.dump(d, fp)
            except:
                pass
Пример #23
0
def _create_mapfile_ato(inmap):
    maps = DataMap([])
    mapsin = DataMap.load(inmap)
    mapsin.iterator = DataMap.SkipIterator
    newlist = ''
    for i, item in enumerate(mapsin):
        newlist = newlist + item.file + ','
    newlist = newlist.rstrip(',')
    newlist = '[' + newlist + ']'
    maps.data.append(DataProduct('localhost', newlist, False))
    return maps
def plugin_main(args, **kwargs):
    """
    Selects those files from mapfile_in that have the same filename-base as the one in
    mapfile_reference.

    Parameters
    ----------
    mapfile_in : str
        Name of the input mapfile from which to select files.
    mapfile_reference : str
        Name of the reference mapfile
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile

    Returns
    -------
    result : dict
        Output datamap filename

    """
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']

    inmap = DataMap.load(kwargs['mapfile_in'])
    refmap = DataMap.load(kwargs['mapfile_reference'])

    map_out = DataMap([])

    basenames = [ os.path.splitext(os.path.basename(item.file))[0] for item in inmap]
    for refitem in refmap:
        refbase = os.path.splitext(os.path.basename(refitem.file))[0]
        idx = basenames.index(refbase)
        map_out.append(inmap[idx])

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
Пример #25
0
def plugin_main(args, **kwargs):
    """
    Trims a string from filenames in a mapfile

    Note that everything from the last instance of the matching string to the
    end is trimmed.

    Parameters
    ----------
    mapfile_in : str
        Filename of datamap to trim
    trim_str : str
        String to remove
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile
    counter : int
        If counter is greater than 0, replace "image32" with "image42". This is
        a special argument for facetselfcal looping only

    Returns
    -------
    result : dict
        New datamap filename

    """
    mapfile_in = kwargs['mapfile_in']
    trim_str = kwargs['trim']
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']
    if 'counter' in kwargs:
        counter = int(kwargs['counter'])
    else:
        counter = 0

    map_out = DataMap([])
    map_in = DataMap.load(mapfile_in)

    for i, item in enumerate(map_in):
        index = item.file.rfind(trim_str)
        if index >= 0:
            item_trim = item.file[:index]
            if counter > 0:
                item_trim = item_trim.replace('image32', 'image42')
            map_out.data.append(DataProduct(item.host, item_trim,
                item.skip))

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
def plugin_main(args, **kwargs):
    """
    Takes in mapfile_in, containing many files, and returns only one

    Parameters
    ----------
    mapfile_in : str
        Parmdbs containing phase solutions
    mapfile_dir : str
        mapfile directory
    filename : str
		output filename
    mapfile_comp : str
		target MSs

    Returns
    -------
    result : dict
        Output datamap filename

    """
    mapfile_dir = kwargs['mapfile_dir']
    mapfile_in = kwargs['mapfile_in']
    mapfile_comp = kwargs['mapfile_comp']
    filename = kwargs['filename']

    value = DataMap.load(mapfile_in)[
        0]  # this the the single mapfile to be expanded
    n = len(DataMap.load(mapfile_comp))  # these are actual MS files

    map_out = DataMap([])
    for i in range(n):
        map_out.data.append(DataProduct(value.host, value.file, value.skip))

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
Пример #27
0
def plugin_main(args, **kwargs):
    """
    Makes a mapfile by expanding single input mapfile item into many items

    Parameters
    ----------
    mapfile_in : str
        Filename of datamap containing single item
    mapfile_to_match : str
        Filename of datamap containing multiple items
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile

    Returns
    -------
    result : dict
        New parmdb datamap filename

    """
    mapfile_in = kwargs['mapfile_in']
    mapfile_to_match = kwargs['mapfile_to_match']
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']

    map_in = DataMap.load(mapfile_in)
    map_match = DataMap.load(mapfile_to_match)
    map_out = DataMap([])

    map_match.iterator = DataMap.SkipIterator
    for item in map_match:
        map_out.data.append(DataProduct(item.host, map_in[0].file, item.skip))

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
def plugin_main(args, **kwargs):
    """
    Takes in mapfile_in, containing many files, and returns only one

    Parameters
    ----------
    mapfile_in : str
        Parmdbs containing phase solutions
    mapfile_dir : str
        mapfile directory
    filename : str
		output filename
    mapfile_comp : str
		target MSs

    Returns
    -------
    result : dict
        Output datamap filename

    """
    mapfile_dir = kwargs['mapfile_dir']
    mapfile_in = kwargs['mapfile_in']
    mapfile_comp = kwargs['mapfile_comp']
    filename = kwargs['filename']

    value = DataMap.load(mapfile_in)[0]		# this the the single mapfile to be expanded
    n = len(DataMap.load(mapfile_comp))	# these are actual MS files

    map_out = DataMap([])
    for i in range(n):
        map_out.data.append(DataProduct(value.host,value.file, value.skip ))

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
Пример #29
0
    def _bbs(self,
             timeslice_map_path,
             parmdbs_map_path,
             sourcedb_map_path,
             skip=False):
        """
        Perform a calibration step. First with a set of sources from the
        gsm and in later iterations also on the found sources
        """
        # create parset for bbs run
        parset = self.parset.makeSubset("BBS.")
        parset_path = self._write_parset_to_file(
            parset, "bbs", "Parset for calibration with a local sky model")

        # create the output file path
        output_mapfile = self._write_datamap_to_file(
            None, "bbs_output", "Mapfile with calibrated measurement sets.")

        converted_sourcedb_map_path = self._write_datamap_to_file(
            None, "source_db", "correctly shaped mapfile for input sourcedbs")

        if skip:
            return output_mapfile

        # The create db step produces a mapfile with a single sourcelist for
        # the different timeslices. Generate a mapfile with copies of the
        # sourcelist location: This allows validation of maps in combination
        # get the original map data
        sourcedb_map = DataMap.load(sourcedb_map_path)
        parmdbs_map = MultiDataMap.load(parmdbs_map_path)
        converted_sourcedb_map = []

        # sanity check for correcy output from previous recipes
        if not validate_data_maps(sourcedb_map, parmdbs_map):
            self.logger.error("The input files for bbs do not contain "
                              "matching host names for each entry content:")
            self.logger.error(repr(sourcedb_map))
            self.logger.error(repr(parmdbs_map))
            raise PipelineException("Invalid input data for imager_bbs recipe")

        self.run_task("imager_bbs",
                      timeslice_map_path,
                      parset=parset_path,
                      instrument_mapfile=parmdbs_map_path,
                      sourcedb_mapfile=sourcedb_map_path,
                      mapfile=output_mapfile,
                      working_directory=self.scratch_directory)

        return output_mapfile
Пример #30
0
def _combine_local_map(inmap):
    map_out = DataMap([])
    map_in = DataMap.load(inmap)
    map_in.iterator = DataMap.SkipIterator
    local_files = {}
    for item in map_in:
        if item.host in local_files:
            local_files[item.host] += item.file + ','
        else:
            local_files[item.host] = item.file + ','
    for k, v in local_files.iteritems():
        v = v.rstrip(',')
        v = '[' + v + ']'
        map_out.data.append(DataProduct(k, v, False))
    return map_out
Пример #31
0
def _combine_local_map(inmap):
    map_out = DataMap([])
    map_in = DataMap.load(inmap)
    map_in.iterator = DataMap.SkipIterator
    local_files = {}
    for item in map_in:
        if item.host in local_files:
            local_files[item.host] += item.file + ','
        else:
            local_files[item.host] = item.file + ','
    for k, v in local_files.iteritems():
        v = v.rstrip(',')
        v = '[' + v + ']'
        map_out.data.append(DataProduct(k, v, False))
    return map_out
Пример #32
0
def verify_subtract(direction):
    """
    Checks selfcal success
    """
    verify_subtract_mapfile = os.path.join(direction.working_dir, 'results', 'facetselfcal',
        direction.name, 'mapfiles', 'verify_subtract.break.mapfile')
    if os.path.exists(verify_subtract_mapfile):
        ok_mapfile = DataMap.load(verify_subtract_mapfile)
        ok_flags = [ast.literal_eval(item.file) for item in ok_mapfile]
        if all(ok_flags):
            return True
        else:
            return False
    else:
        return False
def plugin_main(args, **kwargs):
    """
    Copies each entry of mapfile_in as often as the the length of the corresponding 
    group into a new mapfile

    Parameters
    ----------
    mapfile_in : str
        Name of the input mapfile to be expanded. (E.g. with the skymodels for the 
        different groups.)
    mapfile_groups : str
        Name of the multi-mapfile with the given groups. Number of groups need
        to be the same as the number of files in mapfile_in. 
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile

    Returns
    -------
    result : dict
        Output datamap filename

    """
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']

    inmap = DataMap.load(kwargs['mapfile_in'])
    groupmap = MultiDataMap.load(kwargs['mapfile_groups'])

    if len(inmap) != len(groupmap):
        raise ValueError(
            'PipelineStep_mapfileSingleToGroup: length of {0} and {1} differ'.
            format(kwargs['mapfile_in'], kwargs['mapfile_groups']))

    map_out = DataMap([])
    inindex = 0
    for groupID in xrange(len(groupmap)):
        for fileID in xrange(len(groupmap[groupID].file)):
            map_out.data.append(
                DataProduct(inmap[groupID].host, inmap[groupID].file,
                            (inmap[groupID].skip or groupmap[groupID].skip)))

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
Пример #34
0
    def _bbs(self, timeslice_map_path, parmdbs_map_path, sourcedb_map_path,
              skip = False):
        """
        Perform a calibration step. First with a set of sources from the
        gsm and in later iterations also on the found sources
        """
        # create parset for bbs run
        parset = self.parset.makeSubset("BBS.")
        parset_path = self._write_parset_to_file(parset, "bbs",
                        "Parset for calibration with a local sky model")

        # create the output file path
        output_mapfile = self._write_datamap_to_file(None, "bbs_output",
                        "Mapfile with calibrated measurement sets.")

        converted_sourcedb_map_path = self._write_datamap_to_file(None,
                "source_db", "correctly shaped mapfile for input sourcedbs")

        if skip:
            return output_mapfile

        # The create db step produces a mapfile with a single sourcelist for
        # the different timeslices. Generate a mapfile with copies of the
        # sourcelist location: This allows validation of maps in combination
        # get the original map data
        sourcedb_map = DataMap.load(sourcedb_map_path)
        parmdbs_map = MultiDataMap.load(parmdbs_map_path)
        converted_sourcedb_map = []

        # sanity check for correcy output from previous recipes
        if not validate_data_maps(sourcedb_map, parmdbs_map):
            self.logger.error("The input files for bbs do not contain "
                                "matching host names for each entry content:")
            self.logger.error(repr(sourcedb_map))
            self.logger.error(repr(parmdbs_map))
            raise PipelineException("Invalid input data for imager_bbs recipe")

        self.run_task("imager_bbs",
                      timeslice_map_path,
                      parset = parset_path,
                      instrument_mapfile = parmdbs_map_path,
                      sourcedb_mapfile = sourcedb_map_path,
                      mapfile = output_mapfile,
                      working_directory = self.scratch_directory)

        return output_mapfile
Пример #35
0
def verify_subtract(direction):
    """
    Checks selfcal success
    """
    verify_subtract_mapfile = os.path.join(direction.working_dir, 'results',
                                           'facetselfcal', direction.name,
                                           'mapfiles',
                                           'verify_subtract.break.mapfile')
    if os.path.exists(verify_subtract_mapfile):
        ok_mapfile = DataMap.load(verify_subtract_mapfile)
        ok_flags = [ast.literal_eval(item.file) for item in ok_mapfile]
        if all(ok_flags):
            return True
        else:
            return False
    else:
        return False
def plugin_main(args, **kwargs):
    """
    Makes a mapfile by uncompressing input mapfile list item into separate items

    Parameters
    ----------
    mapfile_in : str
        Filename of datamap containing list of MS files
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile
    hosts : str
        List of hosts/nodes. May be given as a list or as a string
        (e.g., '[host1, host2]'

    Returns
    -------
    result : dict
        New parmdb datamap filename

    """
    mapfile_in = kwargs['mapfile_in']
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']
    if type(kwargs['hosts']) is str:
        hosts = kwargs['hosts'].strip('[]').split(',')
        hosts = [h.strip() for h in hosts]

    map_in = DataMap.load(mapfile_in)
    map_out = DataMap([])

    files = map_in[0].file.strip('[]').split(',')
    files = [f.strip() for f in files]
    for i in range(len(files) - len(hosts)):
        hosts.append(hosts[i])

    for file, host in zip(files, hosts):
        map_out.data.append(DataProduct(host, file, False))

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
def plugin_main(args, **kwargs):
    """
    Makes a mapfile by uncompressing input mapfile list item into separate items

    Parameters
    ----------
    mapfile_in : str
        Filename of datamap containing list of MS files
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile
    hosts : str
        List of hosts/nodes. May be given as a list or as a string
        (e.g., '[host1, host2]'

    Returns
    -------
    result : dict
        New parmdb datamap filename

    """
    mapfile_in = kwargs['mapfile_in']
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']
    if type(kwargs['hosts']) is str:
        hosts = kwargs['hosts'].strip('[]').split(',')
        hosts = [h.strip() for h in hosts]

    map_in = DataMap.load(mapfile_in)
    map_out = DataMap([])

    files = map_in[0].file.strip('[]').split(',')
    files = [f.strip() for f in files]
    for i in range(len(files)-len(hosts)):
        hosts.append(hosts[i])

    for file, host in zip(files, hosts):
        map_out.data.append(DataProduct(host, file, False))

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
Пример #38
0
    def finalize(self):
        """
        Finalize this operation
        """
        # Add output datamaps to direction object for later reference
        self.direction.input_files_single_mapfile = os.path.join(self.pipeline_mapfile_dir,
            'input_files_single.mapfile')
        self.direction.verify_subtract_mapfile = os.path.join(self.pipeline_mapfile_dir,
            'verify_subtract.break.mapfile')
        self.direction.dir_dep_parmdb_mapfile = os.path.join(self.pipeline_mapfile_dir,
            'merge_normalized_selfcal_parmdbs.mapfile')
        self.direction.dir_indep_skymodels_mapfile = os.path.join(self.pipeline_mapfile_dir,
            'full_skymodels.mapfile')
        self.direction.selfcal_plots_mapfile = os.path.join(self.pipeline_mapfile_dir,
            'make_selfcal_plots.mapfile')

        # Store results of verify_subtract check. This will work if the verification
        # was done using multiple bands although we use only one at the moment
        if (os.path.exists(self.direction.verify_subtract_mapfile) and not
            self.parset['calibration_specific']['skip_selfcal_check']):
            ok_mapfile = DataMap.load(self.direction.verify_subtract_mapfile)
            ok_flags = [ast.literal_eval(item.file) for item in ok_mapfile]
            if all(ok_flags):
                self.direction.selfcal_ok = True
            else:
                self.direction.selfcal_ok = False
        elif self.parset['calibration_specific']['skip_selfcal_check']:
            self.direction.selfcal_ok = True
        else:
            self.direction.selfcal_ok = False

        # Delete temp data
        self.direction.cleanup_mapfiles = [
            os.path.join(self.pipeline_mapfile_dir, 'add_all_facet_sources.mapfile'),
            os.path.join(self.pipeline_mapfile_dir, 'shift_and_average.mapfile'),
            os.path.join(self.pipeline_mapfile_dir, 'concat_data.mapfile'),
            os.path.join(self.pipeline_mapfile_dir, 'concat_blavg_data.mapfile'),
            os.path.join(self.pipeline_mapfile_dir, 'predict_outlier_model.mapfile'),
            os.path.join(self.pipeline_mapfile_dir, 'corrupt_outlier_model.mapfile'),
            os.path.join(self.pipeline_mapfile_dir, 'average_pre.mapfile'),
            os.path.join(self.pipeline_mapfile_dir, 'average_post.mapfile'),
            os.path.join(self.pipeline_mapfile_dir, 'sorted_groups.mapfile_groups')]
        self.log.debug('Cleaning up files (direction: {})'.format(self.direction.name))
        self.direction.cleanup()
def plugin_main(args, **kwargs):
    """
    Copies each entry of mapfile_in as often as the the length of the corresponding
    group into a new mapfile

    Parameters
    ----------
    mapfile_in : str
        Name of the input mapfile to be expanded. (E.g. with the skymodels for the
        different groups.)
    mapfile_groups : str
        Name of the multi-mapfile with the given groups. Number of groups need
        to be the same as the number of files in mapfile_in.
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile

    Returns
    -------
    result : dict
        Output datamap filename

    """
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']

    inmap = DataMap.load(kwargs['mapfile_in'])
    groupmap = MultiDataMap.load(kwargs['mapfile_groups'])

    if len(inmap) != len(groupmap):
        raise ValueError('PipelineStep_mapfileSingleToGroup: length of {0} and {1} differ'.format(kwargs['mapfile_in'],kwargs['mapfile_groups']))

    map_out = DataMap([])
    inindex = 0
    for groupID in xrange(len(groupmap)):
        for fileID in xrange(len(groupmap[groupID].file)):
            map_out.data.append(DataProduct(inmap[groupID].host, inmap[groupID].file, (inmap[groupID].skip or groupmap[groupID].skip) ))

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
Пример #40
0
    def finalize(self):
        """
        Finalize this operation
        """
        # Add output datamaps to direction object for later reference
        self.direction.input_files_single_mapfile = os.path.join(
            self.pipeline_mapfile_dir, 'input_files_single.mapfile')
        self.direction.verify_subtract_mapfile = os.path.join(
            self.pipeline_mapfile_dir, 'verify_subtract.break.mapfile')

        # Store results of verify_subtract check. This will work if the verification
        # was done using multiple bands although we use only one at the moment
        if os.path.exists(self.direction.verify_subtract_mapfile
                          ) and not self.parset['skip_selfcal_check']:
            ok_mapfile = DataMap.load(self.direction.verify_subtract_mapfile)
            ok_flags = [ast.literal_eval(item.file) for item in ok_mapfile]
            if all(ok_flags):
                self.direction.selfcal_ok = True
            else:
                self.direction.selfcal_ok = False
        elif self.parset['skip_selfcal_check']:
            self.direction.selfcal_ok = True
        else:
            self.direction.selfcal_ok = False

        # Delete temp data
        self.direction.cleanup_mapfiles = [
            os.path.join(self.pipeline_mapfile_dir,
                         'add_all_facet_sources.mapfile'),
            os.path.join(self.pipeline_mapfile_dir,
                         'shift_and_average.mapfile'),
            os.path.join(self.pipeline_mapfile_dir, 'concat_data.mapfile'),
            os.path.join(self.pipeline_mapfile_dir,
                         'concat_blavg_data.mapfile'),
            os.path.join(self.pipeline_mapfile_dir,
                         'predict_outlier_model.mapfile'),
            os.path.join(self.pipeline_mapfile_dir,
                         'corrupt_outlier_model.mapfile'),
            os.path.join(self.pipeline_mapfile_dir, 'average_pre.mapfile'),
            os.path.join(self.pipeline_mapfile_dir, 'average_post.mapfile')
        ]
        self.log.debug('Cleaning up files (direction: {})'.format(
            self.direction.name))
        self.direction.cleanup()
def plugin_main(args, **kwargs):
    print 'PLUGIN KWARG: ', kwargs
    result = {}
    datamap = None
    fileid = kwargs['mapfile_in']
    datamap = DataMap.load(fileid)
    #if kwargs['change_files']:
    #    for item in datamap:
    #        item.file = kwargs['change_files']
    if kwargs['join_files']:
        for item in datamap:
            item.file = os.path.join(item.file,kwargs['join_files'])
    if kwargs['newname']:
        fileid = os.path.join(os.path.dirname(fileid), kwargs['newname'])
    if datamap:
        print 'Wrinting mapfile: ',fileid
        datamap.save(fileid)
        result['mapfile'] = fileid
    return result
def plugin_main(args, **kwargs):
    """
    Makes a mapfile by repeating max size in input mapfile items

    Parameters
    ----------
    mapfile_in : str
        Filename of datamap containing MS files
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile

    Returns
    -------
    result : dict
        New datamap filename

    """
    mapfile_in = kwargs['mapfile_in']
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']

    map_in = DataMap.load(mapfile_in)
    map_out = DataMap([])

    # Find max size in x and y
    xsize_list = []
    ysize_list = []
    for item in map_in:
        xsize, ysize = [int(s) for s in item.file.split(' ')]
        xsize_list.append(xsize)
        ysize_list.append(ysize)
    maxsize = '{0} {1}'.format(max(xsize_list), max(ysize_list))

    for item in map_in:
        map_out.data.append(DataProduct(item.host, maxsize, item.skip))

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
Пример #43
0
def plugin_main(args, **kwargs):
    print 'PLUGIN KWARG: ', kwargs
    result = {}
    datamap = None
    fileid = kwargs['mapfile_in']
    datamap = DataMap.load(fileid)
    #if kwargs['change_files']:
    #    for item in datamap:
    #        item.file = kwargs['change_files']
    if kwargs['join_files']:
        for item in datamap:
            item.file = os.path.join(item.file, kwargs['join_files'])
    if kwargs['newname']:
        fileid = os.path.join(os.path.dirname(fileid), kwargs['newname'])
    if datamap:
        print 'Wrinting mapfile: ', fileid
        datamap.save(fileid)
        result['mapfile'] = fileid
    return result
def plugin_main(args, **kwargs):
    """
    Takes in mapfiles and change host names to allow for efficient MPI reduction

    Parameters
    ----------
    mapfiles : list of strs
        List of the names of the input mapfiles. WILL BE MODIFIED!
    mapfile_dir : str
        Name of the directory containing the mapfile
    head_node_only : str
        String: Either True or False. Describes whether to use just the head node or not.

    Returns
    -------
    result : empty dictionary

    """

    result = {}
    mapfiles = (kwargs['mapfiles'][1:-1]).split(',')    # read in list of mapfiles from string (separated by commas)
    mapfile_dir = kwargs['mapfile_dir']
    head_node_only = (kwargs['head_node_only'] in ['True','true','T','t','1'])
    fn_list=[]
    for mf in mapfiles:
        fn_list.append( os.path.join(mapfile_dir,mf) )

    # caution: remember to reload the compute node iterable for every mapfile to ensure corresponding entries have the same node set as host

    for fn in fn_list:
        if(head_node_only):
            cn_cycle = it.cycle( get_head_node( ClusterDesc(str(os.environ['cluster_desc_file'])) ) )   # Read in head node. Set up iterator (unnessary with just one node, but better to have less code!)
        else:
            cn_cycle = it.cycle( get_compute_nodes( ClusterDesc(str(os.environ['cluster_desc_file'])) ) ) # Read in list of compute nodes. Set up iterator to cyclically iterate over them.

        data = DataMap.load(fn)                 # read in current data map file (probably with all host values set to "localhost")
        iterator = DataMap.SkipIterator(data)   # set up iterator for all values in mapfile

        for value in iterator:
            value.host = cn_cycle.next()   # iterate through map file, assigning each entry a host from the available compute nodes in a cyclical fashion
        data.save(fn)   # overwrite original file
    return result
Пример #45
0
def plugin_main(args, **kwargs):
    """
    Updates the hosts in an input datamap

    Parameters
    ----------
    mapfile_in : str, optional
        Filename of datamap
    mapfile_dir: str, optional
        Directory containing mapfiles. All mapfiles in this directory will be
        updated
    hosts : str
        List of hosts/nodes. May be given as a list or as a string
        (e.g., '[host1, host2]'

    """
    if 'mapfile_dir' in kwargs:
        mapfiles_in = glob.glob(os.path.join(kwargs['mapfile_dir'], '*'))
    else:
        mapfiles_in = [kwargs['mapfile_in']]

    if len(mapfiles_in) == 0:
        return

    if type(kwargs['hosts']) is str:
        hosts = kwargs['hosts'].strip('[]').split(',')
        hosts = [h.strip() for h in hosts]

    for mapfile_in in mapfiles_in:
        try:
            map = DataMap.load(mapfile_in)
            for i in range(len(map) - len(hosts)):
                hosts.append(hosts[i])

            for item, host in zip(map, hosts):
                item.host = host

            map.save(mapfile_in)
        except:
            print(
                'File {} does not appear to be a mapfile. Skipping it.'.format(
                    mapfile_in))
Пример #46
0
def input2strlist(invar):
    str_list = None
    if type(invar) is str:
        if invar.startswith('[') and invar.endswith(']'):
            str_list = [f.strip(' \'\"') for f in invar.strip('[]').split(',')]
        else:
            map_in = DataMap.load(invar)
            map_in.iterator = DataMap.SkipIterator
            str_list = []
            for fname in map_in:
                if fname.startswith('[') and fname.endswith(']'):
                    for f in fname.strip('[]').split(','):
                        str_list.append(f.strip(' \'\"'))
                else:
                    str_list.append(fname.strip(' \'\"'))
    elif type(invar) is list:
        str_list = [str(f).strip(' \'\"') for f in invar]
    else:
        raise TypeError('input2strlist: Type '+str(type(invar))+' unknown!')
    return str_list
Пример #47
0
    def cleanup(self):
        """
        Cleans up unneeded data
        """
        from lofarpipe.support.data_map import DataMap

        for mapfile in self.cleanup_mapfiles:
            try:
                datamap = DataMap.load(mapfile)
                for item in datamap:
                    # Handle case in which item.file is a Python list
                    if item.file[0] == '[' and item.file[-1] == ']':
                        files = item.file.strip('[]').split(',')
                    else:
                        files = [item.file]
                    for f in files:
                        if os.path.exists(f):
                            os.system('rm -rf {0}'.format(f))
            except IOError:
                pass
def plugin_main(args, **kwargs):
    """
    Makes a mapfile by filtering input mapfile items into one item (the middle
    one)

    Parameters
    ----------
    mapfile_in : str
        Filename of datamap containing MS files
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile

    Returns
    -------
    result : dict
        New parmdb datamap filename

    """
    mapfile_in = kwargs['mapfile_in']
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']

    map_in = DataMap.load(mapfile_in)
    map_out = DataMap([])

    map_in.iterator = DataMap.SkipIterator
    files = [item.file for item in map_in]
    hosts = [item.host for item in map_in]
    if 'index' in kwargs:
        index = int(kwargs['index'])
    else:
        index = len(files)/2
    map_out.data.append(DataProduct(hosts[index], files[index], False))

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
def plugin_main(args, **kwargs):
    """
    Updates the hosts in an input datamap

    Parameters
    ----------
    mapfile_in : str, optional
        Filename of datamap
    mapfile_dir: str, optional
        Directory containing mapfiles. All mapfiles in this directory will be
        updated
    hosts : str
        List of hosts/nodes. May be given as a list or as a string
        (e.g., '[host1, host2]'

    """
    if 'mapfile_dir' in kwargs:
        mapfiles_in = glob.glob(os.path.join(kwargs['mapfile_dir'], '*'))
    else:
        mapfiles_in = [kwargs['mapfile_in']]

    if len(mapfiles_in) == 0:
        return

    if type(kwargs['hosts']) is str:
        hosts = kwargs['hosts'].strip('[]').split(',')
        hosts = [h.strip() for h in hosts]

    for mapfile_in in mapfiles_in:
        try:
            map = DataMap.load(mapfile_in)
            for i in range(len(map)-len(hosts)):
                hosts.append(hosts[i])

            for item, host in zip(map, hosts):
                item.host = host

            map.save(mapfile_in)
        except:
            print('File {} does not appear to be a mapfile. Skipping it.'.format(mapfile_in))
def plugin_main(args, **kwargs):
    """
    Appends a string to filenames in a mapfile

    Parameters
    ----------
    mapfile_in : str
        Filename of datamap to append to
    append_str : str
        String to append
    mapfile_dir : str
        Directory for output mapfile
    filename: str
        Name of output mapfile

    Returns
    -------
    result : dict
        New datamap filename

    """
    mapfile_in = kwargs['mapfile_in']
    append_str = kwargs['append']
    if append_str == 'None':
        append_str = ''
    mapfile_dir = kwargs['mapfile_dir']
    filename = kwargs['filename']

    map_out = DataMap([])
    map_in = DataMap.load(mapfile_in)

    for i, item in enumerate(map_in):
        map_out.data.append(DataProduct(item.host, item.file+append_str, item.skip))

    fileid = os.path.join(mapfile_dir, filename)
    map_out.save(fileid)
    result = {'mapfile': fileid}

    return result
Пример #51
0
def check_existing_files(mapfile):
    """
    Checks if files in input mapfile exist

    Parameters
    ----------
    mapfile : str
        Filename of mapfile to check

    Returns
    -------
    file : list
        List of files

    """

    all_exist = True
    all_files = []
    log.info('Checking for existing files...')
    try:
        datamap = DataMap.load(mapfile)
        for item in datamap:
            # Handle case in which item.file is a Python list
            if item.file[0] == '[' and item.file[-1] == ']':
                files = item.file.strip('[]').split(',')
            else:
                files = [item.file]
            for f in files:
                if not os.path.exists(f):
                    all_exist = False
            all_files.extend(files)
        if all_exist:
            log.info('...all files exist')
        else:
            log.warning('...one or more files not found')
        return all_files
    except IOError:
        return []
Пример #52
0
    def check_existing_files(self, mapfile):
        """
        Checks if files in input mapfile exist

        Parameters
        ----------
        mapfile : str
            Filename of mapfile to check

        Returns
        -------
        all_exist : bool
            True if all files in mapfile exist, False if not

        """
        from lofarpipe.support.data_map import DataMap

        all_exist = True
        self.log.debug('Checking for existing files...')
        try:
            datamap = DataMap.load(mapfile)
            for item in datamap:
                # Handle case in which item.file is a Python list
                if item.file[0] == '[' and item.file[-1] == ']':
                    files = item.file.strip('[]').split(',')
                else:
                    files = [item.file]
                for f in files:
                    if not os.path.exists(f):
                        all_exist = False
            if all_exist:
                self.log.debug('...all files exist')
            else:
                self.log.debug('...one or more files not found')
            return all_exist
        except IOError:
            self.log.debug('Could not read mapfile {}. Skipping it'.format(mapfile))
            return False