Exemplo n.º 1
0
def gen_hybrid_insert(dbconfig, datadict, table_name):
    """
    Return appropriate data structures for inserting the given data dictionary
    into a database via a database access library. Currently using Psycopg2,
    so return a tuple of an INSERT template string and a sequence of values.

    Returns (None, None) if the given data dictionary does not contain the field
    names required for the hybrid table (including the 'metadata' field).
    """
    if (not datadict):  # sanity check
        errMsg = "(gen_hybrid_insert): Empty data dictionary cannot be inserted into table."
        raise errors.ProcessingError(errMsg)

    schema_clean = clean_id(dbconfig.get('db_schema_name'))
    table_clean = clean_id(table_name)

    required = SQL_FIELDS_HYBRID.copy()
    fieldnames = [clean_id(field) for field in required]
    num_keys = len(fieldnames)  # number of keys minus 1 (w/o metadata)
    fieldnames.append('metadata')  # add name of the JSON metadata field
    keys = ', '.join(fieldnames)  # made from cleaned fieldnames

    values = [
        datadict.get(key) for key in required if datadict.get(key) is not None
    ]
    num_vals = len(values)  # number of values minus 1 (no metadata yet)
    if (num_keys == num_vals):  # must have a value for each key
        values.append(to_JSON(
            datadict, sort_keys=True))  # add the JSON for the metadata field
        place_holders = ', '.join(['%s' for v in values])
        sql_fmt_str = f"insert into {schema_clean}.{table_clean} ({keys}) values ({place_holders});"
        return (sql_fmt_str, values)
    else:  # there was a mismatch of keys and values
        errMsg = f"Unable to find values for all {num_keys} required fields: {required}"
        raise errors.ProcessingError(errMsg)
Exemplo n.º 2
0
def load_aliases(alias_file, debug=False, tool_name=''):
    """ Load field name aliases from the given alias filepath. """
    if (debug):
        print("({}): Loading from aliases file '{}'".format(
            tool_name, alias_file),
              file=sys.stderr)

    try:
        config = configparser.ConfigParser(strict=False,
                                           empty_lines_in_values=False)
        config.optionxform = lambda option: option
        config.read_file(open(alias_file))
    except FileNotFoundError:
        errMsg = "Aliases file '{}' not found or not readable.".format(
            alias_file)
        raise errors.ProcessingError(errMsg)

    try:
        aliases = config['aliases']
    except KeyError:
        errMsg = "No 'aliases' section found in aliases file '{}'.".format(
            alias_file)
        raise errors.ProcessingError(errMsg)

    if (debug):
        print("({}): Read {} field name aliases.".format(
            tool_name, len(aliases)),
              file=sys.stderr)

    return dict(aliases)
Exemplo n.º 3
0
    def process(self, metadata):
        """
        Perform the main work of the task and return the results as a Python data structure.
        This method overrides JWST_ObsCoreCalcTask method to use iRods file access.
        """
        if (self._DEBUG):
            print("({}.process): ARGS={}".format(self.TOOL_NAME, self.args),
                  file=sys.stderr)

        # use the specified HDU of the FITS file to compute the WCS information
        which_hdu = self.args.get('which_hdu', 0)

        # get the iRods file path argument of the file to be opened
        irff_path = self.args.get('irods_fits_file')

        try:
            # get the FITS file at the specified path
            irff = self.irods.getf(irff_path, absolute=True)

            # sanity check on the given FITS file
            if (irff.size < FITS_BLOCK_SIZE):
                errMsg = "Skipping file too small to be a valid FITS file: '{}'".format(
                    irff_path)
                raise errors.UnsupportedType(errMsg)

            if (self._DEBUG):
                print("({}): Reading iRods FITS file '{}'.".format(
                    self.TOOL_NAME, irff_path),
                      file=sys.stderr)

            # try to get the specified header and read WCS info from it
            header = self.irods.get_header(irff, which_hdu)
            if (header):
                wcs_info = self.irods.get_WCS(header)
            else:  # unable to read the specified header
                errMsg = "Unable to find or read HDU {} of FITS file '{}'.".format(
                    which_hdu, irff_path)
                raise errors.ProcessingError(errMsg)

        except DataObjectDoesNotExist as dodne:
            errMsg = "Unable to find the specified iRods FITS file '{}'.".format(
                irff_path)
            raise errors.ProcessingError(errMsg)

        except OSError as oserr:
            errMsg = "Unable to read WCS info from iRods FITS file '{}': {}.".format(
                irff_path, oserr)
            raise errors.ProcessingError(errMsg)

        # check that we got the WCS information from the file
        if (wcs_info is None):
            errMsg = "No WCS info found in iRods FITS file '{}'.".format(
                irff_path)
            raise errors.ProcessingError(errMsg)

        # try to produce values for each of the desired result fields
        calculated = self.calculate_results(wcs_info, metadata)
        metadata['calculated'] = calculated  # add calculations to metadata

        return metadata  # return the results of processing
Exemplo n.º 4
0
    def process(self, _):
        """
        Perform the main work of the task and return the results as a Python data structure.
        """
        if (self._DEBUG):
            print("({}.process): ARGS={}".format(self.TOOL_NAME, self.args),
                  file=sys.stderr)

        # process the given, already validated FITS file
        fits_file = self.args.get('fits_file')
        ignore_list = self.args.get(
            'ignore_list') or fits_utils.FITS_IGNORE_KEYS
        which_hdu = self.args.get('which_hdu', 0)

        try:
            with fits.open(fits_file) as hdus_list:
                if (not fits_utils.has_image_data(hdus_list)):
                    errMsg = f"Skipping FITS file '{fits_file}': no image data in primary HDU"
                    raise errors.UnsupportedType(errMsg)

                hdrs = fits_utils.get_header_fields(hdus_list, which_hdu,
                                                    ignore_list)

        except OSError as oserr:
            errMsg = "Unable to read image metadata from FITS file '{}': {}.".format(
                fits_file, oserr)
            raise errors.ProcessingError(errMsg)

        metadata = dict()  # create overall metadata structure
        finfo = gather_file_info(fits_file)
        if (finfo is not None):  # add common file information
            metadata['file_info'] = finfo
        if (hdrs is not None):  # add the headers to the metadata
            metadata['headers'] = hdrs
        return metadata  # return the results of processing
Exemplo n.º 5
0
    def update_metadata(self, imd_path, sink_data, remove_only=False):
        """
        Attach or remove the items in the given data dictionary to/from the iRods file
        at the specified path. If the remove_only flag is True, file metadata items with
        keys matching input item keys are removed from the iRods file.
        """
        if (self._DEBUG):
            print(
                f"({self.TOOL_NAME}.update_metadata): imd_path={imd_path}, remove_only={remove_only} metadata={sink_data}",
                file=sys.stderr)

        try:
            if (remove_only):
                # try to remove the specified metadata from the iRods file node
                self.irods.remove_metaf(imd_path, sink_data, absolute=True)
                action = 'removed from'
            else:
                # try to attach the given metadata to the iRods file node
                self.irods.put_metaf(imd_path, sink_data, absolute=True)
                action = 'attached to'

        except (NetworkException, Exception) as ex:
            errMsg = f"Unable to alter the metadata of the iRods file at '{imd_path}'. Exception: {ex}"
            raise errors.ProcessingError(errMsg)

        if (self._VERBOSE):
            print(
                f"({self.TOOL_NAME}): Metadata {action} iRods file '{imd_path}'",
                file=sys.stderr)
Exemplo n.º 6
0
    def output_results(self, metadata):
        """ Output the given metadata in the configured output format. """
        genfile = self.args.get('gen_file_path')
        outfile = self.args.get('output_file')
        out_fmt = self.args.get('output_format') or DEFAULT_OUTPUT_FORMAT

        if (out_fmt == 'json'):
            if (genfile):  # if generating the output filename/path
                file_info = md_utils.get_file_info(metadata)
                fname = file_info.get(
                    'file_name') if file_info else "NO_FILENAME"
                outfile = self.gen_output_file_path(fname, out_fmt,
                                                    self.TOOL_NAME)
                self.output_JSON(metadata, outfile)
            elif (outfile is not None):  # else if using the given filepath
                self.output_JSON(metadata, outfile)
            else:  # else using standard output
                self.output_JSON(metadata)

        else:
            errMsg = "({}.process): Invalid output format '{}'.".format(
                self.TOOL_NAME, out_fmt)
            raise errors.ProcessingError(errMsg)

        if (self._VERBOSE):
            out_dest = outfile if (outfile) else STDOUT_NAME
            print("({}): Results output to '{}'".format(
                self.TOOL_NAME, out_dest),
                  file=sys.stderr)
Exemplo n.º 7
0
    def input_data(self):
        """
        Read data from a file or stream and return it as a Python data structure.
        """
        if (self._DEBUG):
            print("({}.input_data): ARGS={}".format(self.TOOL_NAME, self.args),
                  file=sys.stderr)

        # process the given, already validated input file
        input_file = self.args.get('input_file')
        if (self._VERBOSE):
            if (input_file is None):
                print("({}): Reading data from {}".format(
                    self.TOOL_NAME, STDIN_NAME),
                      file=sys.stderr)
            else:
                print("({}): Reading data file '{}'".format(
                    self.TOOL_NAME, input_file),
                      file=sys.stderr)

        input_format = self.args.get('input_format') or DEFAULT_INPUT_FORMAT
        if (input_format == 'json'):
            data = self.input_JSON(input_file)
        else:  # currently, no other input formats
            errMsg = "({}.process): Invalid input format '{}'.".format(
                self.TOOL_NAME, input_format)
            raise errors.ProcessingError(errMsg)

        return data  # return the input data
    def output_results (self, indata):
        """
        Store the given data into the configured database OR just output SQL
        to do so, depending on the 'output-only' flag.
        """
        if (self._DEBUG):
            print("({}.output_results): ARGS={}".format(self.TOOL_NAME, self.args), file=sys.stderr)

        # load the database configuration from a given or default file path
        dbconfig_file = self.args.get('dbconfig_file') or DEFAULT_DBCONFIG_FILEPATH
        dbconfig = self.load_sql_db_config(dbconfig_file)

        # check table name to see if it is still available in the database
        catalog_table = self.args.get('catalog_table')
        if (not self.table_exists(dbconfig, catalog_table)):
            errMsg = "Catalog table to fill '{}' does not exist.".format(catalog_table)
            raise errors.ProcessingError(errMsg)

        # file information is needed by the SQL generation methods below
        file_info = md_utils.get_file_info(indata)

        # read the catalog table data from the input data structure
        data = md_utils.get_data(indata)

        # Decide whether we are creating a table in the DB or just outputting SQL statements.
        sql_only = self.args.get('output_only')
        if (sql_only):                      # if just outputting SQL
            self.write_table(dbconfig, data, catalog_table, file_info)
        else:                               # else creating the table in the database
            self.fill_table(dbconfig, data, catalog_table)
Exemplo n.º 9
0
    def output_results(self, metadata):
        """
        Store the given data into the configured database OR just output SQL
        to do so, depending on the 'output-only' flag.
        """
        if (self._DEBUG):
            print("({}.output_results): ARGS={}".format(
                self.TOOL_NAME, self.args),
                  file=sys.stderr)

        # get the iRods file path argument of the file to be annotated
        imd_path = self.args.get(
            'irods_md_file',
            self.args.get('irods_fits_file'))  # default is iRods input file

        if ((imd_path is None) or (not imd_path.strip())):
            errMsg = "A full iRods path to an annotatable iRods file must be specified."
            raise errors.ProcessingError(errMsg)

        # check the iRods metadata target file path for validity
        try:
            self.irods.getf(imd_path, absolute=True)

        except (CollectionDoesNotExist, DataObjectDoesNotExist, NoResultFound):
            errMsg = "Unable to find iRods file for metadata alteration at '{}'.".format(
                imd_path)
            raise errors.ProcessingError(errMsg)

        # extract the data to be output from the given metadata
        sink_data = self.get_data_for_output(metadata)

        # When adding/replacing metadata, skip items in the skip list,
        # otherwise do not skip any items: remove all user specified items
        remove_only = self.args.get('remove_only') or False
        if (not remove_only):
            remove_entries(sink_data, ignore=self.skip_list)

        # decide whether we are changing metadata in iRods or just outputting it
        output_only = self.args.get('output_only')
        if (not output_only):  # if storing metadata to iRods
            self.update_metadata(imd_path, sink_data, remove_only)

        else:  # else just outputting metadata
            oodata = dict()
            oodata['file_info'] = md_utils.get_file_info(metadata)
            oodata['to_sink'] = sink_data
            super().output_results(oodata)
Exemplo n.º 10
0
    def load_sql_db_config(self, dbconfig_file):
        """
        Load the database configuration from the given filepath. Returns a dictionary
        of database configuration parameters.
        """
        if (self._DEBUG):
            print("({}): Reading DB configuration file '{}'".format(
                self.TOOL_NAME, dbconfig_file),
                  file=sys.stderr)

        try:
            config = configparser.ConfigParser(
                interpolation=configparser.ExtendedInterpolation(),
                strict=False,
                empty_lines_in_values=False)
            config.read_file(
                open(dbconfig_file))  # try to read the DB config file
        except FileNotFoundError:
            errMsg = "DB configuration file '{}' not found or not readable.".format(
                dbconfig_file)
            raise errors.ProcessingError(errMsg)

        try:
            dbconfig = dict(
                config['db_properties'])  # try to fetch DB properties
        except KeyError:
            e1 = 'DB storage specified but no database parameters (db_properties) found'
            errMsg = "{} in DB configuration file '{}'.".format(
                e1, dbconfig_file)
            raise errors.ProcessingError(errMsg)

        if (dbconfig.get('db_uri') is None):
            e1 = 'DB storage specified but no database URI (db_uri) parameter found'
            errMsg = "{} in DB configuration file '{}'.".format(
                e1, dbconfig_file)
            raise errors.ProcessingError(errMsg)

        if (self._DEBUG):
            print("({}): Read {} DB configuration properties.".format(
                self.TOOL_NAME, len(dbconfig)),
                  file=sys.stderr)

        return dbconfig
Exemplo n.º 11
0
def check_missing_parameters(config, required=REQUIRED_DB_PARAMETERS):
    """
    Check the given configuration dictionary for all database parameters required by this module.

    :raises ProcessingError if a required database parameter is missing.
    """
    missing = missing_entries(config, required)
    if (missing):
        errMsg = f"Missing required parameters: {missing}"
        raise errors.ProcessingError(errMsg)
Exemplo n.º 12
0
def clean_id(identifier, allowed=DB_ID_CHARS):
    """
    Clean the given SQL identifier to prevent SQL injection attacks.
    Note: that this method is specifically for simple SQL identifiers and is NOT
    a general solution which prevents SQL injection attacks.
    """
    if (identifier):
        return keep_characters(identifier, allowed)
    else:
        errMsg = "Identifier to be cleaned cannot be empty or None."
        raise errors.ProcessingError(errMsg)
Exemplo n.º 13
0
 def load_fields_info(self, fields_file):
     """
     Load the fields info dictionary from the given filepath and return it.
     The fields info file is assumed to define a single dictionary in TOML format.
     """
     try:
         return toml.load(
             fields_file)  # load fields info file as a dictionary
     except Exception:
         errMsg = "Field Information file '{}' not found or not readable.".format(
             fields_file)
         raise errors.ProcessingError(errMsg)
Exemplo n.º 14
0
    def select_data_for_output(self, metadata):
        """
        Select a subset of data, from the given metadata, for output.
        Returns a single dictionary of selected data.
        """
        calculated = md_utils.get_calculated(metadata)
        if (not calculated):
            errMsg = "The 'calculated' data, required by this program, is missing from the input."
            raise errors.ProcessingError(errMsg)

        selected = calculated.copy()
        return selected  # return selected dataset
Exemplo n.º 15
0
    def get_data_for_output(self, metadata):
        """
        Extract the data to be output from the given metadata. Currently, just returns a
        copy of the "calculated" dictionary.
        """
        calculated = md_utils.get_calculated(metadata)
        if (not calculated):
            errMsg = "The 'calculated' data, required by this program, is missing from the input."
            raise errors.ProcessingError(errMsg)

        copied = calculated.copy()
        return copied
Exemplo n.º 16
0
def fill_table(dbconfig, data, catalog_table):
    """
    Insert the given list of data row lists into the named catalog table using
    the given DB parameters.
    """
    if (not data):  # sanity check
        errMsg = "(fill_table): Empty data list cannot be inserted into table."
        raise errors.ProcessingError(errMsg)

    sql_fmt_str = pg_gen.gen_insert_rows(dbconfig, catalog_table)
    insert_rows_sql(dbconfig, sql_fmt_str,
                    data)  # data already in the correct form
    return len(data)  # assume all rows correctly inserted
Exemplo n.º 17
0
 def test_pe_code(self):
     pe = xcpt.ProcessingError(self.EMSG, self.ECODE)
     print(pe)
     print(type(pe))
     assert pe.error_code == self.ECODE
     pedict = pe.to_dict()
     assert 'message' in pedict
     assert 'error_code' in pedict
     assert pedict.get('message') == self.EMSG
     assert pedict.get('error_code') == self.ECODE
     petup = pe.to_tuple()
     assert petup[0] == self.EMSG
     assert petup[1] == self.ECODE
Exemplo n.º 18
0
    def output_report(self, report):
        """ Output the given list of report strings in the selected format. """
        rpt_fmt = self.args.get('report_format') or 'text'

        if (rpt_fmt == 'json'):
            self.output_JSON(report, sys.stderr)

        elif (rpt_fmt == 'text'):
            for line in report:
                print(line, file=sys.stderr)

        else:
            errMsg = "Invalid report format '{}'.".format(rpt_fmt)
            raise errors.ProcessingError(errMsg)
Exemplo n.º 19
0
    def process(self, metadata):
        """
        Perform the main work of the task on the given metadata and return the results
        as a Python data structure.
        """
        if (self._DEBUG):
            print("({}.process): ARGS={}".format(self.TOOL_NAME, self.args),
                  file=sys.stderr)

        # process the given, already validated FITS file
        fits_file = self.args.get('fits_file')
        if (self._DEBUG):
            print("({}): Reading FITS file '{}'".format(
                self.TOOL_NAME, fits_file),
                  file=sys.stderr)

        # compute the WCS information from the specified HDU of the FITS file
        which_hdu = self.args.get('which_hdu', 0)

        try:
            with fits.open(fits_file) as hdus_list:
                wcs_info = fits_utils.get_WCS(hdus_list, which_hdu)

        except OSError as oserr:
            errMsg = "Unable to read WCS info FITS file '{}': {}.".format(
                fits_file, oserr)
            raise errors.ProcessingError(errMsg)

        if (wcs_info is None):
            errMsg = "No WCS info found in FITS file '{}'.".format(fits_file)
            raise errors.ProcessingError(errMsg)

        # try to produce values for each of the desired result fields
        calculated = self.calculate_results(wcs_info, metadata)
        metadata['calculated'] = calculated  # add calculations to metadata

        return metadata  # return the results of processing
Exemplo n.º 20
0
def calc_wcs_coordinates(wcs_info, metadata, calculations):
    """
    Extract the WCS coordinates for the reference pixel of the current image file.
    Sets both s_ra and s_dec fields simultaneously when either field is processed.
    This method assumes that neither s_ra nor s_dec fields have a value yet and it will
    overwrite current values for both s_ra and s_dec if that assumption is not valid.
    """
    if (('s_ra' in calculations)
            and ('s_dec' in calculations)):  # avoid repetition
        return  # exit out now

    ctype = list(wcs_info.wcs.ctype)

    md = metadata.get('headers')
    if (md is None):
        return  # exit out now

    crval1 = md.get('CRVAL1')
    crval2 = md.get('CRVAL2')
    if (crval1 is not None and (crval2 is not None)):
        try:
            val1 = float(crval1)
            val2 = float(crval2)
        except ValueError:  # on string to number conversion
            errMsg = "(calc_wcs_coords) Unable to convert CRVAL1 or CRVAL2 to a number."
            raise errors.ProcessingError(errMsg)

        if (ctype[0].startswith('RA')):  # if CRVAL1 has the RA value
            calculations['s_ra'] = val1  # put CRVAL1 value into s_ra
            calculations['s_dec'] = val2  # put CRVAL2 value into s_dec
        elif (ctype[0].startswith('DEC')):  # else if CRVAL1 has the DEC value
            calculations['s_dec'] = val1  # put CRVAL1 value into s_dec
            calculations['s_ra'] = val2  # put CRVAL2 value into s_ra
        else:
            errMsg = "(calc_wcs_coords) Unable to assign RA/DEC axes from ctype={}".format(
                ctype)
            raise errors.ProcessingError(errMsg)
Exemplo n.º 21
0
def create_table_sql(args, dbconfig, column_names, column_formats):
    """
    Create a new table with the given table name, columns, and types as specified by
    the given catalog metadata dictionary using the given DB parameters.

    Returns a list of cleaned SQL strings to be executed to create the table
    Raises ProcessingError if the column name or format vectors are not present in
    the input OR if the vectors are not the same size.
    """
    if (column_names is not None and column_formats is not None
            and (len(column_names) == len(column_formats))):
        return pg_gen.gen_create_table_sql(args, dbconfig, column_names,
                                           column_formats)
    else:
        errMsg = 'Column name and format lists must be the same length.'
        raise errors.ProcessingError(errMsg)
Exemplo n.º 22
0
 def test_pe(self):
     pe = xcpt.ProcessingError(self.EMSG)
     print(pe)
     print(type(pe))
     assert pe.error_code == xcpt.ProcessingError.ERROR_CODE
     pedict = pe.to_dict()
     assert 'message' in pedict
     assert 'error_code' in pedict
     assert pedict.get('message') == self.EMSG
     assert pedict.get('error_code') == xcpt.ProcessingError.ERROR_CODE
     petup = pe.to_tuple()
     assert petup[0] == self.EMSG
     assert petup[1] == xcpt.ProcessingError.ERROR_CODE
     pestr = str(pe)
     print(pestr)
     assert str(xcpt.ProcessingError.ERROR_CODE) in pestr
     assert 'wrong' in pestr
Exemplo n.º 23
0
    def process(self, _):
        """
        Perform the main work of the task and return the results as a Python data structure.
        """
        if (self._DEBUG):
            print("({}.process): ARGS={}".format(self.TOOL_NAME, self.args),
                  file=sys.stderr)

        # process the given, already validated FITS file
        fits_file = self.args.get('fits_file')
        ignore_list = self.args.get(
            'ignore_list') or fits_utils.FITS_IGNORE_KEYS
        catalog_hdu = self.args.get('catalog_hdu', 1)

        try:
            with fits.open(fits_file) as hdus_list:
                if (not fits_utils.has_catalog_data(hdus_list)):
                    errMsg = f"Skipping FITS file '{fits_file}': no catalog in HDU 1"
                    raise errors.UnsupportedType(errMsg)
                hdrs = fits_utils.get_header_fields(hdus_list, catalog_hdu,
                                                    ignore_list)
                cinfo = fits_utils.get_column_info(hdus_list, catalog_hdu)

                fits_rec = hdus_list[catalog_hdu].data
                data = fits_utils.rows_from_data(fits_rec)
                table = Table.read(hdus_list, hdu=catalog_hdu)
                meta = fits_utils.get_table_meta_attribute(table)

        except OSError as oserr:
            errMsg = "Unable to read catalog data from FITS file '{}': {}.".format(
                fits_file, oserr)
            raise errors.ProcessingError(errMsg)

        outdata = dict()  # create overall ouput structure
        finfo = gather_file_info(fits_file)
        if (finfo is not None):  # add common file information
            outdata['file_info'] = finfo
        if (hdrs is not None):  # add the headers to the output
            outdata['headers'] = hdrs
        if (cinfo is not None):  # add column metadata to the output
            outdata['column_info'] = cinfo
        outdata['meta'] = meta  # add extra table metadata to the output
        outdata['data'] = data  # add the data table to the output

        return outdata  # return the results of processing
Exemplo n.º 24
0
def fits_format_to_sql(tform):
    """
    Map the given FITS column format field into the corresponding SQL type declaration.

    :param tform: a FITS columnn format field for translation.
    :return an SQL type declaration string, corresponding to the given FITS format code.
    :raises ProcessingError if tform specifies a type not supported by the database.
    """
    fmt_code = tform
    if (tform and len(tform) > 1):
        fmt_code = tform[0:1]

    sql_decl = _FITS_FORMAT_TO_SQL.get(fmt_code, UNSUPPORTED)
    if (sql_decl != UNSUPPORTED):
        return sql_decl
    else:
        errMsg = f"FITS data column format '{tform}' is not supported."
        raise errors.ProcessingError(errMsg)
Exemplo n.º 25
0
def gen_column_decls_sql(column_names, column_formats):
    """
    Generate the SQL column declarations for a table, given lists of column names
    and FITS format specs.

    :param column_names: a list of column name strings
    :param column_formats: a list of FITS format specifiers strings

    :return a list of SQL declaration strings for the table columns (no trailing commas!)
    :raises ProcessingError if the given column name and format lists are not the same size.
    """
    if (len(column_names) != len(column_formats)):
        errMsg = "Column name and format lists must be the same length."
        raise errors.ProcessingError(errMsg)

    col_types = [fits_format_to_sql(fmt) for fmt in column_formats]
    col_names_clean = [clean_id(name)
                       for name in column_names]  # clean the column names
    return ["{0} {1}".format(n, t) for n, t in zip(col_names_clean, col_types)]
Exemplo n.º 26
0
    def check_missing(self, metadata):
        """
        Check for missing fields in the results part of the given metadata.
        Returns a list of warning message strings, to be later formatted and output.
        """
        report = []
        calculated = md_utils.get_calculated(metadata)
        if (not calculated):
            errMsg = "The 'calculated' data, required by this program, is missing from the input."
            raise errors.ProcessingError(errMsg)

        fields_info = md_utils.get_fields_info(metadata)
        for field_name, props in fields_info.items():
            if (field_name not in calculated):
                req_fld = 'Required' if props.get('required') else 'Optional'
                msg = "WARNING: {0} field '{1}' still does not have a value.".format(
                    req_fld, field_name)
                report.append(msg)

        return report  # return list of message strings
Exemplo n.º 27
0
def gen_insert_row(dbconfig, datadict, table_name):
    """
    Return appropriate data structures for inserting the given data dictionary
    into a database via a database access library. Currently using Psycopg2,
    so return a tuple of an INSERT template string and a sequence of values.
    """
    if (not datadict):  # sanity check
        errMsg = "(gen_insert_row): Empty data dictionary cannot be inserted into table."
        raise errors.ProcessingError(errMsg)

    schema_clean = clean_id(dbconfig.get('db_schema_name'))
    table_clean = clean_id(table_name)

    keys_clean = [clean_id(key) for key in datadict.keys()]
    keys = ', '.join(keys_clean)

    values = list(datadict.values())
    place_holders = ', '.join(['%s' for v in values])
    sql_fmt_str = f"insert into {schema_clean}.{table_clean} ({keys}) values ({place_holders});"
    return (sql_fmt_str, values)
Exemplo n.º 28
0
    def output_results(self, metadata):
        """ Output the given metadata in the configured output format. """
        genfile = self.args.get('gen_file_path')
        outfile = self.args.get('output_file')

        if (genfile):  # if generating the output filename/path
            file_info = md_utils.get_file_info(metadata)
            fname = file_info.get('file_name') if file_info else "NO_FILENAME"
            outfile = self.gen_output_file_path(fname, PICKLE_EXTENSION,
                                                self.TOOL_NAME)
            self.output_pickle(metadata, outfile)
        elif (outfile is not None):  # else if using the given filepath
            self.output_pickle(metadata, outfile)
        else:  # else trying to use standard output
            errMsg = "Pickle cannot be written to {}.".format(STDOUT_NAME)
            raise errors.ProcessingError(errMsg)

        if (self._VERBOSE):
            print("({}): Pickled data output to '{}'".format(
                self.TOOL_NAME, outfile),
                  file=sys.stderr)
Exemplo n.º 29
0
    def process(self, _):
        """
        Perform the main work of the task and return the results as a Python data structure.
        """
        if (self._DEBUG):
            print("({}.process): ARGS={}".format(self.TOOL_NAME, self.args),
                  file=sys.stderr)

        # get the selection and filtering arguments
        which_hdu = self.args.get('which_hdu', 0)
        ignore_list = self.args.get(
            'ignore_list') or fits_utils.FITS_IGNORE_KEYS

        # get the iRods file path argument of the file to be opened
        irff_path = self.args.get('irods_fits_file')

        try:
            # get the FITS file at the specified path
            irff = self.irods.getf(irff_path, absolute=True)

            # sanity check on the given FITS file
            if (irff.size < FITS_BLOCK_SIZE):
                errMsg = "Skipping file too small to be a valid FITS file: '{}'".format(
                    irff_path)
                raise errors.UnsupportedType(errMsg)

            # actually read the file to get the specified header
            header = self.irods.get_header(irff, which_hdu)
            if (header):
                if (not self.irods.is_image_header(header)):
                    errMsg = "HDU {} is not an image header. Skipping FITS file '{}'.".format(
                        which_hdu, irff_path)
                    raise errors.ProcessingError(errMsg)

                # get and save some common file information
                file_info = self.irods.get_irods_file_info(irff)

                # get additional metadata ABOUT the iRods file itself
                irods_metadata = self.irods.get_irods_metadata(irff)

                # get any content metadata attached to the file
                content_metadata = self.irods.get_content_metadata(irff)

                # now try to read the FITS header from the FITS file
                hdrs = fits_utils.get_fields_from_header(header, ignore_list)

            else:  # unable to read the specified header
                errMsg = "Unable to read image metadata from HDU {} of FITS file '{}'.".format(
                    which_hdu, irff_path)
                raise errors.ProcessingError(errMsg)

        except DataObjectDoesNotExist as dodne:
            errMsg = "Unable to find the specified iRods FITS file '{}'.".format(
                irff_path)
            raise errors.ProcessingError(errMsg)

        except OSError as oserr:
            errMsg = "Unable to read image metadata from iRods FITS file '{}': {}.".format(
                irff_path, oserr)
            raise errors.ProcessingError(errMsg)

        metadata = dict()  # create overall metadata structure
        metadata[
            'file_info'] = file_info  # add previously gathered remote file information
        metadata['irods_metadata'] = irods_metadata
        metadata['content_metadata'] = content_metadata

        if (hdrs is not None):
            metadata['headers'] = hdrs  # add the headers to the metadata
        return metadata  # return the results of processing