示例#1
0
    def test_complicated_groups(self):

        # Test a deeply nested field (nested with-in 3 group fields)
        structures = pds4_read(self.data('af.xml'), lazy_load=True, quiet=True)
        structure = structures[9]
        _check_array_equal(
            structure.field(0)[11, 2, 5, 2:5],
            [-0.52061242, -0.51312923, -0.50972084], 'float64')

        # Test two fields with-in one group field
        structures = pds4_read(self.data('test_group_fields.xml'),
                               lazy_load=True,
                               quiet=True)

        structure = structures[0]
        _check_array_equal(
            structure.field(0)[9, 5, 2:5],
            [331.28526671, 328.97851487, 327.87342654], 'float64')
        _check_array_equal(
            structure.field(1)[3, 7, 1:4],
            [277.80563195, 281.21064631, 279.24594501], 'float64')

        # Test three fields with-in one group field
        structure = structures[1]

        _check_array_equal(
            structure.field(0)[20, 7:10], [207., 208., 209.], 'float64')
        _check_array_equal(
            structure.field(1)[9, 5, 2:5],
            [331.28526671, 328.97851487, 327.87342654], 'float64')
        _check_array_equal(
            structure.field(2)[3, 7, 1:4],
            [277.80563195, 281.21064631, 279.24594501], 'float64')
示例#2
0
    def test_simple_groups(self):

        # Test via binary tables
        structures = pds4_read(self.data('af.xml'), lazy_load=True, quiet=True)

        # Test single nested, 1D group fields
        structure = structures[11]
        _check_array_equal(
            structure.field(10)[0], [-0.08405675, 0.60469515, -0.79200899],
            'float64')

        structure = structures[13]
        string = [
            'mvn_app_rel_150601_150607_v01.bc',
            'mvn_sc_rel_150601_150607_v01.bc '
        ]
        _check_array_equal(structure.field(-1)[0, 3:5], string, 'U32')

        # Test via delimited table
        structures = pds4_read(self.data('Product_DelimitedTable.xml'),
                               lazy_load=True,
                               quiet=True)

        # Test single nested, 1D group fields
        structure = structures[0]
        _check_array_equal(
            structure.field(-1)[-1], [5, 1, 1, 1, 1, 1, 0, 0, 0, 0], 'int8')
示例#3
0
    def setup(self):

        super(TestArrayDataTypes, self).setup()

        self.structures = pds4_read(self.data('test_array_data_types.xml'),
                                    lazy_load=True,
                                    quiet=True)
示例#4
0
def read_dat_pds4(filename, write_csv=False, quiet=True):
    """ Reads a PDS4 .dat format file, preserving column order and data type,
    except that byte order is switched to native if applicable. The .dat file
    and .xml label must exist in the same directory.
    Return the data _and_ the label.
    """
    if filename[-4:].lower() == ".dat":
        filename = filename[:-4] + ".xml"
    if filename[-4:].lower() != ".xml":
        raise TypeError("Unknown filetype: {ext}".format(ext=filename[-4:]))
    structures = pds4_tools.pds4_read(filename, quiet=quiet)
    dat_dict = OrderedDict({})
    for i in range(len(structures[0].fields)):
        name = structures[0].fields[i].meta_data["name"]
        dat_dtype = structures[0].fields[i].meta_data["data_type"]
        dtype = pds4_tools.reader.data_types.pds_to_numpy_type(dat_dtype)
        data = np.array(structures[0].fields[i], dtype=dtype)
        if (sys.byteorder == "little"
                and ">" in str(dtype)) or (sys.byteorder == "big"
                                           and "<" in str(dtype)):
            data = data.byteswap().newbyteorder()
        dat_dict[name] = data
    dataframe = pd.DataFrame(dat_dict)
    if write_csv:
        dataframe.to_csv(filename.replace(".xml", ".csv"), index=False)
    return dataframe
示例#5
0
    def setup(self):

        super(TestTableDataTypes, self).setup()

        structures = pds4_read(self.data('test_table_data_types.xml'),
                               lazy_load=True,
                               quiet=True)
        self.table = structures[0]
示例#6
0
    def setup(self):

        super(TestDelimitedTable, self).setup()

        structures = pds4_read(self.data('Product_DelimitedTable.xml'),
                               lazy_load=True,
                               quiet=True)
        self.structure = structures[0]
示例#7
0
    def setup(self):

        super(TestCharacterTable, self).setup()

        structures = pds4_read(self.data('colors.xml'),
                               lazy_load=True,
                               quiet=True)
        self.structure = structures[0]
示例#8
0
    def load_file(self, filespec, numhdu=None, dstobj=None, **kwdargs):
        # create object of the appropriate type, usually
        # an AstroImage or AstroTable, by looking up the correct
        # class in self.factory_dict, under the keys 'image' or
        # 'table'
        import numpy as np
        from urllib.parse import urlparse
        from pds4_tools import pds4_read
        from .exceptions import InvalidPDS4Data

        urlinfo = urlparse(filespec)
        if urlinfo.scheme not in ['file', '']:
            raise IOError('File must be local: {}'.format(filespec))

        struct = pds4_read(urlinfo.path)

        if numhdu is None:
            # return the first table or array
            for i in range(len(struct)):
                if struct[i].is_array():
                    break
            else:
                raise InvalidPDS4Data('No image found in {}'.format(filespec))
        else:
            i = numhdu

        im = np.array(struct[i].data)

        # Ginga draws from bottom to top, left to right.  Transform
        # our data so that when it is drawn this way it is displayed
        # in the correct orientation
        disp_dir = struct[i].meta_data.display_settings['Display_Direction']
        haxis = struct[i].meta_data.get_axis_array(
            disp_dir['horizontal_display_axis'])

        # PDS4 data is Last Index Fastest and axis numbering starts at
        # 1.  Numpy arrays are also Last Index Fastest, but start at
        # 0.
        if haxis['sequence_number'] == 1:
            # Swap axes so that the horizontal axis is numpy axis 1:
            im = im.T

        hdisp_dir = disp_dir['horizontal_display_direction']
        vdisp_dir = disp_dir['vertical_display_direction']
        if 'Right to Left' in hdisp_dir:
            im = im[:, ::-1]  # invert horizontal axis
        if 'Top to Bottom' in vdisp_dir:
            im = im[::-1]  # invert vertical axis

        if dstobj is not None:
            dstobj.set_data(im)

        return im, i, None
示例#9
0
def process(path):
    url = "".join((ARCHIVE_PREFIX, path))
    label = pds4_read(url, lazy_load=True, quiet=True).label
    lid = label.find("Identification_Area/logical_identifier").text
    tel = lid.split(":")[5][:3].upper()
    if tel in CatalinaBigelow._telescopes:
        obs = CatalinaBigelow()
    elif tel in CatalinaLemmon._telescopes:
        obs = CatalinaLemmon()
    elif tel in CatalinaKittPeak._telescopes:
        obs = CatalinaKittPeak()
    else:
        raise ValueError(f"Unknown telescope {tel}")

    obs.product_id = lid
    obs.mjd_start = Time(
        label.find(
            "Observation_Area/Time_Coordinates/start_date_time").text).mjd
    obs.mjd_stop = Time(
        label.find(
            "Observation_Area/Time_Coordinates/stop_date_time").text).mjd
    obs.exposure = round((obs.mjd_stop - obs.mjd_start) * 86400, 3)

    survey = label.find(".//survey:Survey")
    ra, dec = [], []
    for corner in ("Top Left", "Top Right", "Bottom Right", "Bottom Left"):
        coordinate = survey.find(
            "survey:Image_Corners"
            f"/survey:Corner_Position[survey:corner_identification='{corner}']"
            "/survey:Coordinate")
        ra.append(float(coordinate.find("survey:right_ascension").text))
        dec.append(float(coordinate.find("survey:declination").text))
    obs.set_fov(ra, dec)

    maglimit = survey.find(
        "survey:Limiting_Magnitudes"
        "/survey:Percentage_Limit[survey:Percentage_Limit='50']"
        "/survey:limiting_magnitude")
    if maglimit is not None:
        obs.maglimit = float(maglimit.text)

    return obs
示例#10
0
def inventory(base_path):
    """Iterate over all files of interest.

    Returns
    -------
    labels : iterator of tuples
        Path and pds4_tools label object.

    """

    logger = logging.getLogger("add-spacewatch")
    inventory_fn = f"{base_path}/gbo.ast.spacewatch.survey/data/collection_gbo.ast.spacewatch.survey_data_inventory.csv"

    if not os.path.exists(base_path):
        raise Exception('Missing inventory list %s', fn)

    # Read in all relevant LIDs from the inventory.
    lids = set()
    with open(inventory_fn, 'r') as inf:
        for line in inf:
            if not line.startswith(
                    'P,urn:nasa:pds:gbo.ast.spacewatch.survey:data:sw_'):
                continue
            if '.fits' not in line:
                continue

            lid = line[2:-6]
            lids.add(lid)

    # search directory-by-directory for labels with those LIDs
    for fn in iglob(
            f"{base_path}/gbo.ast.spacewatch.survey/data/20*/*/*/*.xml"):
        label = pds4_read(fn, lazy_load=True, quiet=True).label
        lid = label.find("Identification_Area/logical_identifier").text
        if lid in lids:
            lids.remove(lid)
            yield fn, label

    # did we find all the labels?
    if len(lids) > 0:
        logger.error(f'{len(lids)} LIDs were not found.')
import png
import glob
import numpy as np
from pds4_tools import pds4_read 

for i in glob.glob('*.*L'):                      # traverse file
    data = pds4_read(i,quiet=True)               # read pds
    img = np.array(data[0].data)                 # to array
    img = img.reshape(-1,2352*3)                 # reshape            
    img16 = np.array(np.uint16(img*256))         # to 16bits
    png.from_array(img16,'RGB').save(f"{i}.png") # to png & save
示例#12
0
# group_data1.ravel()[::2].reshape(21, 10, 5)
#
# Table 1, column 2
# group_data1.ravel()[1::2].reshape(21, 10, 5)
#
# Table 2, column 1
# group_data2.ravel()[::11].reshape(21, 10)
#
# Table 2, column 2
# same as table 1, column 1
#
# Table 2, column 3
# same as table 1, column 2

af_path = os.path.join(os.path.dirname(__file__), '..', 'data/af.xml')
structures = pds4_read(af_path)
original_data = structures[9]['PIXEL_CORNER_LON']

# Create a table with two columns, each of shape (21,10,5)
group_data1 = np.asarray([original_data.ravel(),
                          original_data.ravel()]).reshape(21, 10, 10)

# Create a table with three columns, where the first has shape (21,10) and the other two have shapes (21,10,5)
group_data2 = group_data1.copy().ravel()
group_data2 = np.insert(group_data2, list(range(0, 21 * 10 * 10, 10)),
                        list(range(0, 210))).reshape(21, 10, 11)

# Ensure data is MSB
if sys.byteorder == 'little':
    group_data1.byteswap(True)
    group_data2.byteswap(True)
示例#13
0
    def setup(self):

        super(TestArrayStructure, self).setup()

        structures = pds4_read(self.data('af.xml'), lazy_load=True, quiet=True)
        self.structure = structures[1]
示例#14
0
# group_data1.ravel()[::2].reshape(21, 10, 5)
#
# Table 1, column 2
# group_data1.ravel()[1::2].reshape(21, 10, 5)
#
# Table 2, column 1
# group_data2.ravel()[::11].reshape(21, 10)
#
# Table 2, column 2
# same as table 1, column 1
#
# Table 2, column 3
# same as table 1, column 2

af_path = os.path.join(os.path.dirname(__file__), '..', 'data/af.xml')
structures = pds4_read(af_path, quiet=True)
original_data = structures[9]['PIXEL_CORNER_LON']

# Create a table with two columns, each of shape (21,10,5)
group_data1 = np.asarray([original_data.ravel(),
                          original_data.ravel()]).reshape(21, 10, 10)

# Create a table with three columns, where the first has shape (21,10) and the other two have shapes (21,10,5)
group_data2 = group_data1.copy().ravel()
group_data2 = np.insert(group_data2, list(range(0, 21 * 10 * 10, 10)),
                        list(range(0, 210))).reshape(21, 10, 11)

# Ensure data is MSB
if sys.byteorder == 'little':
    group_data1.byteswap(True)
    group_data2.byteswap(True)
示例#15
0
    def setup(self):

        super(TestBinaryTable, self).setup()

        structures = pds4_read(self.data('af.xml'), lazy_load=True, quiet=True)
        self.structure = structures[3]
示例#16
0
    def setup(self):

        super(TestStructureList, self).setup()
        self.structures = pds4_read(self.data('af.xml'),
                                    lazy_load=True,
                                    quiet=True)
示例#17
0
def load_hierarchy(path):
    xml_paths = []
    for dirpath, dirname, files in os.walk(path):
        for filepath in fnmatch.filter(files, '*.xml'):
            xml_paths.append(os.path.join(dirpath, filepath))

    print 'Processing', len(xml_paths), 'XML files'
    num_records = 0
    for xml_path in xml_paths:
        struct_list = pds4_read(xml_path)

        print '*' * 80
        print xml_path
        print 'Processing', len(struct_list), 'structs'
        # See SBN dev wiki for pds4_read usage:
        # http://sbndev.astro.umd.edu/wiki/Python_PDS4_Tools#pds4_read
        for struct in struct_list:
            print '-' * 80
            if type(struct) != TableStructure:
                print 'Unknown struct type encountered:', type(struct)
                continue

            # xpath tester: https://codebeautify.org/Xpath-Tester#
            fields = [
                elt.text
                for elt in struct.label.findall('.//Field_Character/name')
            ]
            formats = [
                elt.text for elt in struct.label.findall(
                    './/Field_Character/field_format')
            ]

            # See astropy docs for writing fits tables:
            # http://docs.astropy.org/en/stable/io/fits/#creating-a-new-table-file
            cols = []
            for field, fmt in zip(fields, formats):
                try:
                    cols.append(
                        fits.Column(name=field,
                                    format='E',
                                    array=struct[field]))
                except ValueError:
                    pass

            if len(cols) < 1:
                continue

            print 'Writing fits file...'
            coldef = fits.ColDefs(cols)
            tbhdu = fits.BinTableHDU.from_columns(coldef)

            prihdr = fits.Header()
            prihdr['COMMENT'] = 'Converted by PDSKit from %s' % xml_path
            prihdu = fits.PrimaryHDU(header=prihdr)

            thdulist = fits.HDUList([prihdu, tbhdu])

            out_dirpath = './out/%s' % os.path.dirname(xml_path)
            if not os.path.exists(out_dirpath):
                os.makedirs(out_dirpath)

            out_filename = os.path.basename(xml_path).split('.')[0]
            out_path = '%s.fits' % os.path.join(out_dirpath, out_filename)
            if not os.path.exists(out_path):
                thdulist.writeto(out_path)

            num_records += len(struct.data)
    print 'Total number of records:', num_records
示例#18
0
def read_table(label_file, table_name=None, index_col=None, quiet=True):
    """
    Reads data from a PDS4 product using pds4_tools. Data are
    converted to a Pandas DataFrame and any columns that are
    using PDS4 time data types are converted to Timestamps.

    By default the first table is read, otherwise the
    table_name can be used to specify.

    If index_col is set, this field will be used as an index in 
    the returned pandas DataFrame, otherwise if a time field
    is present this will be used.

    NOTE: only simple 2D tables can currently be read. Group
    fields are skipped with a warning message!
    """

    data = pds4_read(label_file, quiet=True)
    labelpath = Path(label_file)

    num_arrays = 0
    tables = []

    for structure in data.structures:
        if structure.is_array():
            num_arrays += 1
        elif structure.is_table():
            tables.append(structure.id)

    if len(tables) == 0:
        log.error('no tables found in this product')
        return None

    if not quiet:
        log.info('product {:s} has {:d} tables and {:d} arrays'.format(
            labelpath.name, len(tables), num_arrays))

    if table_name is not None:
        if table_name in tables:
            table = data[table_name]
        else:
            log.error(
                'table name {:s} not found in product'.format(table_name))
            return None
    else:
        table = data[tables[0]]

    if not quiet:
        log.info('using table {:s}'.format(table.id))

    # clunky way to get the names of group fields to ignore for now
    table_manifest = TableManifest.from_label(data[table.id].label)

    time_cols = []
    fields = []
    group_fields = []

    for i in range(len(table_manifest)):
        if table_manifest[i].is_group():
            continue
        name = table_manifest[i].full_name()
        if table_manifest.get_parent_by_idx(i):
            group_fields.append(table_manifest[i].full_name())
            continue
        fields.append(name)

        data_type = table_manifest[i]['data_type']
        if 'Date' in data_type:
            time_cols.append(name)

        # TODO: fix nested tables (group fields)
        # TODO: fix handling of masked arrays (in particular missing vals in CSVs trigger this)

    data = pds4_df(table.data, columns=fields)
    for field in fields:
        data[field] = table.data[field]

    for group_field in group_fields:
        field_name = group_field.split(',')[1].strip()
        field_data = table[group_field]
        if field_data.shape[0] != len(data):
            log.warn(
                'group field length does not match table length - skipping!')
            continue
        data[field_name] = None
        for idx in range(len(data)):
            data[field_name].iat[idx] = field_data[idx]

    path, filename = os.path.split(label_file)
    data.path = path
    data.filename = filename

    for col in time_cols:
        data[col] = pd.to_datetime(data[col]).dt.tz_localize(None)

    if index_col is not None:
        if index_col in fields:
            data.set_index(index_col, drop=True, inplace=True)
            log.info('data indexed with field {:s}'.format(time_cols[0]))
        else:
            log.warn('requested index field {:s} not found'.format(index_col))
            index_col = None

    if index_col is None:
        if len(time_cols) == 0:
            log.warning(
                'no time-based columns found, returned data will not be time-indexed'
            )
        elif len(time_cols) == 1:
            data.set_index(time_cols[0], drop=True, inplace=True)
            log.info('data time-indexed with field {:s}'.format(time_cols[0]))
        else:
            if 'TIME_UTC' in data.columns:
                data.set_index('TIME_UTC', drop=True, inplace=True)
                log.info('data time-indexed with field {:s}'.format(
                    time_cols[0]))
            else:
                data.set_index(time_cols[0], drop=True, inplace=True)
                log.info('data time-indexed with field {:s}'.format(
                    time_cols[0]))

    return data