def read(self, filename, definition=3, verbose=True, smart_typing=False): ''' Read a table from a IPAC file Required Arguments: *filename*: [ string ] The IPAC file to read the table from Optional Keyword Arguments: *definition*: [ 1 | 2 | 3 ] The definition to use to read IPAC tables: 1: any character below a pipe symbol belongs to the column on the left, and any characters below the first pipe symbol belong to the first column. 2: any character below a pipe symbol belongs to the column on the right. 3: no characters should be present below the pipe symbols (default). *smart_typing*: [ True | False ] Whether to try and save memory by using the smallest integer type that can contain a column. For example, a column containing only values between 0 and 255 can be stored as an unsigned 8-bit integer column. The default is false, so that all integer columns are stored as 64-bit integers. ''' if not definition in [1, 2, 3]: raise Exception("definition should be one of 1/2/3") self.reset() # Open file for reading f = file(filename, 'rb') line = f.readline() # Read in comments and keywords while True: char1 = line[0:1] char2 = line[1:2] if char1 <> '\\': break if char2==' ' or not '=' in line: # comment self.add_comment(line[1:]) else: # keyword pos = line.index('=') key, value = line[1:pos], line[pos + 1:] value = value.replace("'", "").replace('"', '') key, value = key.strip(), value.strip() self.add_keyword(key, value) line = f.readline() # Column headers l = 0 units = {} nulls = {} while True: char1 = line[0:1] if char1 <> "|": break if l==0: # Column names line = line.replace('-', ' ').strip() # Find all pipe symbols pipes = [] for i, c in enumerate(line): if c=='|': pipes.append(i) # Find all names names = line.replace(" ", "").split("|")[1:-1] elif l==1: # Data types line = line.replace('-', ' ').strip() types = dict(zip(names, \ line.replace(" ", "").split("|")[1:-1])) elif l==2: # Units units = dict(zip(names, \ line.replace(" ", "").split("|")[1:-1])) else: # Null values nulls = dict(zip(names, \ line.replace(" ", "").split("|")[1:-1])) line = f.readline() l = l + 1 if len(pipes) <> len(names) + 1: raise "An error occured while reading the IPAC table" if len(units)==0: for name in names: units[name]='' if len(nulls)==0: nulls_given = False for name in names: nulls[name]='' else: nulls_given = True # Pre-compute numpy column types numpy_types = {} for name in names: numpy_types[name] = type_dict[types[name]] # Data array = {} for name in names: array[name] = [] while True: if line.strip() == '': break for i in range(len(pipes)-1): first, last = pipes[i] + 1, pipes[i + 1] if definition==1: last = last + 1 if first==1: first=0 elif definition==2: first = first - 1 if i + 1==len(pipes)-1: item = line[first:].strip() else: item = line[first:last].strip() if item.lower() == 'null' and nulls[names[i]] <> 'null': if nulls[names[i]] == '': if verbose: print "WARNING: found unexpected 'null' value. Setting null value for column "+names[i]+" to 'null'" nulls[names[i]] = 'null' nulls_given = True else: raise Exception("null value for column "+names[i]+" is set to "+nulls[i]+" but found value 'null'") array[names[i]].append(item) line = f.readline() # Check that null values are of the correct type if nulls_given: for name in names: try: n = numpy_types[name](nulls[name]) nulls[name] = n except: n = invalid[numpy_types[name]] for i, item in enumerate(array[name]): if item == nulls[name]: array[name][i] = n if verbose: if len(str(nulls[name]).strip()) == 0: print "WARNING: empty null value for column "+name+" set to "+str(n) else: print "WARNING: null value for column "+name+" changed from "+str(nulls[name])+" to "+str(n) nulls[name] = n # Convert to numpy arrays for name in names: if smart_typing: dtype = None low = min(array[name]) high = max(array[name]) if types[name] in ['i', 'int', 'integer']: low, high = long(low), long(high) for nt in [np.uint8, np.int8, np.uint16, np.int16, np.uint32, np.int32, np.uint64, np.int64]: if low >= np.iinfo(nt).min and high <= np.iinfo(nt).max: dtype = nt break elif types[name] in ['long']: low, high = long(low), long(high) for nt in [np.uint64, np.int64]: if low >= np.iinfo(nt).min and high <= np.iinfo(nt).max: dtype = nt break elif types[name] in ['float', 'real']: low, high = float(low), float(high) for nt in [np.float32, np.float64]: if low >= np.finfo(nt).min and high <= np.finfo(nt).max: dtype = nt break else: dtype = type_dict[types[name]] else: dtype = type_dict[types[name]] # If max integer is larger than 2**63 then use uint64 if dtype == np.int64: if max([long(x) for x in array[name]]) > 2**63: dtype = np.uint64 warnings.warn("using type uint64 for column %s" % name) array[name] = np.array(array[name], dtype=dtype) if smart_typing: if np.min(array) >= 0 and np.max(array) <= 1: array = array == 1 if self._masked: self.add_column(name, array[name], \ mask=smart_mask(array[name], nulls[name]), unit=units[name], \ fill=nulls[name]) else: self.add_column(name, array[name], \ null=nulls[name], unit=units[name])
def read(self, filename, hdu=None, memmap=False, verbose=True): ''' Read a table from a FITS file Required Arguments: *filename*: [ string ] The FITS file to read the table from Optional Keyword Arguments: *hdu*: [ integer ] The HDU to read from the FITS file (this is only required if there are more than one table in the FITS file) *memmap*: [ bool ] Whether PyFITS should use memory mapping ''' _check_pyfits_installed() self.reset() # If no hdu is requested, check that there is only one table if not hdu: tables = _list_tables(filename) if len(tables) == 0: raise Exception("No tables in file") elif len(tables) == 1: hdu = tables.keys()[0] else: raise TableException(tables, 'hdu') hdulist = pyfits.open(filename, memmap=memmap) hdu = hdulist[hdu] table = hdu.data header = hdu.header columns = hdu.columns # Construct dtype for table dtype = [] for i in range(len(hdu.data.dtype)): name = hdu.data.dtype.names[i] type = hdu.data.dtype[name] if type.subdtype: type, shape = type.subdtype else: shape = () # Get actual FITS format and zero-point format, bzero = hdu.columns[i].format, hdu.columns[i].bzero # Remove numbers from format, to find just type format = format.strip("1234567890.") if type.type is np.string_ and format in ['I', 'F', 'E', 'D']: if format == 'I': type = np.int64 elif format in ['F', 'E']: type = np.float32 elif format == 'D': type = np.float64 if format == 'X' and type.type == np.uint8: type = np.bool if len(shape) == 1: shape = (shape[0] * 8,) if format == 'L': type = np.bool if bzero and format in ['B', 'I', 'J']: if format == 'B' and bzero == -128: dtype.append((name, np.int8, shape)) elif format == 'I' and bzero == - np.iinfo(np.int16).min: dtype.append((name, np.uint16, shape)) elif format == 'J' and bzero == - np.iinfo(np.int32).min: dtype.append((name, np.uint32, shape)) else: dtype.append((name, type, shape)) else: dtype.append((name, type, shape)) dtype = np.dtype(dtype) if self._masked: self._setup_table(len(hdu.data), dtype, units=columns.units) else: self._setup_table(len(hdu.data), dtype, units=columns.units, \ nulls=columns.nulls) # Populate the table for i, name in enumerate(columns.names): format, bzero = hdu.columns[i].format[-1], hdu.columns[i].bzero if bzero and format in ['B', 'I', 'J']: data = pyfits.rec.recarray.field(hdu.data, i) if format == 'B' and bzero == -128: data = (data.astype(np.int16) + bzero).astype(np.int8) elif format == 'I' and bzero == - np.iinfo(np.int16).min: data = (data.astype(np.int32) + bzero).astype(np.uint16) elif format == 'J' and bzero == - np.iinfo(np.int32).min: data = (data.astype(np.int64) + bzero).astype(np.uint32) else: data = table.field(name) else: data = table.field(name) self.data[name][:] = data[:] if self._masked: if columns.nulls[i] == 'NAN.0': null = np.nan elif columns.nulls[i] == 'INF.0': null = np.inf else: null = columns.nulls[i] self.data[name].mask = smart_mask(data, null) self.data[name].set_fill_value(null) for key in header.keys(): if not key[:4] in ['TFOR', 'TDIS', 'TDIM', 'TTYP', 'TUNI'] and \ not key in standard_keys: self.add_keyword(key, header[key]) for comment in header.get_comment(): if isinstance(comment, pyfits.Card): self.add_comment(comment.value) else: self.add_comment(comment) if hdu.name: self.table_name = str(hdu.name) hdulist.close() return