def xmlColorData(infile, outfile, excludeList=None): """ xmlColorData takes a color catalog file produced by the colorCatalog module and marks it with the pipeline/archive protocol markup. Must use at least the xmlStartCat function above to initialise the xml file and use the xmlEndCat function to close things up nicely. This function is a variation of the xmlData function above but designed specifically for the multicolor catalog produced by the colorCatalog module. This was necessitated by the non-generalised nature of the multicolor catalog, where column headers were dependant on the nature of the data (i.e. different filter names). Usage: xmlColorData('catalogfile','outputfile') K Anderson 02-01-02 """ if excludeList: for field in excludeList: print "Note:", field, "being excluded from xml markup", os.path.basename( infile) catalog = open(infile) rows = catalog.readlines() catalog.close() headerList = [] headerList = fillHeader(infile) # Number of sources is the first field of the last row of the rows list. lastrow = rows[len(rows) - 1] # This is just getting rid of trailing zeros det_sources = string.split(string.split(lastrow)[0], '.')[0] xmlcatalog = open(outfile, 'a') xmlcatalog.write(" <data Nobjects=\"%s\">\n" % det_sources) # the function fillHeader returns a list of tuples from the header of # the color catalog. The fillHeader function _may_ return tuples of length # 2 or 3, but in the case of the color catalog, length 2 is expected only. # The crux of the next block is to find all occurances of different filter names # which might be present in a color catalog header and keep a count of those # filter names. it does this by appending new filter names to the list fList. # This count will be an attribute of the xml markup. This is done for the general # case where MAG and MAGERR columns are not necessarily paired. format2 = '\t<field name=\"%s\">%s</field>\n' for f in range(len(rows)): if '#' in rows[f]: continue cols = string.split(rows[f]) source = cols[0] xmlcatalog.write(" <object number=\"" + source + "\">\n") for j in range(1, len(headerList)): param = cols[j] colId = headerList[j] if colId[1] in excludeList: continue if string.find(colId[1], "_BPZ") != -1 or string.find( colId[1], "_APER_") != -1: # the length of the filter lines will be based on this format: # single filter, len == 6: HST_ACS_HRC_F250W_APER_CORR # single filter, len == 6: HST_ACS_HRC_F250W_MAG_BPZ # crossed filters len == 7: HST_ACS_HRC_POL120UV_F435W_MAGERR_BPZ # raises and exception if len is not one of these values 'cause if'n # it ain't, sumpin's horrible wrong. filterId = string.split(colId[1], "_") if len(filterId) == 6: filterName = filterId[0] + "_" + filterId[ 1] + "_" + filterId[2] + "_" + filterId[3] fieldName = filterId[4] + "_" + filterId[5] xmlcatalog.write(format2 % (fieldName, param)) elif len(filterId) == 7: filterName = filterId[0] + "_" + filterId[ 1] + "_" + filterId[2] + "_" + filterId[ 3] + "_" + filterId[4] fieldName = filterId[5] + "_" + filterId[6] xmlcatalog.write(format2 % (fieldName, param)) else: raise SyntaxError, "Error: unexpected filter syntax in multicolor catalog header." else: column_no, name = colId xmlcatalog.write(format2 % (name, param)) xmlcatalog.write(" </object>\n") xmlcatalog.write(" </data>\n") xmlcatalog.close() return
def xmlData(infile, outfile, excludeList=None): """ xmlData takes a SExtractor catalog file and marks it with the pipeline/archive protocol markup. Must use at least the xmlStartCat function above to initialise the xml file and use the xmlEndCat function to close things up nicely. Caller can pass a list of fields to exclude from the xml markup, if that is desired, if not, the list is None. Usage: xmlData('catalog','outputfile'[,excludeList=['blah']]) where catalogType is one of SExtractor, BPZ, etc. (maybe more later), catalog is the catalog's filename, and outputfile is just that. """ if excludeList: for field in excludeList: print "Note:", field, "being excluded from xml markup of", os.path.basename( infile) catalog = open(infile) rows = catalog.readlines() catalog.close() headerList = [] headerList = fillHeader(infile) # Number of sources is the first field of the last row of the rows list. lastrow = rows[len(rows) - 1] # This is just getting rid of trailing zeros det_sources = string.split(string.split(lastrow)[0], '.')[0] xmlcatalog = open(outfile, 'a') xmlcatalog.write(" <data Nobjects=\"%s\">\n" % det_sources) # the function fillHeader returns a list of tuples from the header of # the Sextractor catalog. These tuples may be of length 2 or 3, depending # on whether a particular parameter has multiple occurrences (eg. FLUX_APER). # The formats will be used to handle those cases for writing the xml catalog. format2 = '\t<field name=\"%s\">%s</field>\n' format3 = '\t<field name=\"%s\" number=\"%s\">%s</field>\n' for f in range(len(rows)): if '#' in rows[f]: continue cols = string.split(rows[f]) source = cols[0] xmlcatalog.write(" <object number=\"" + source + "\">\n") for j in range(1, len(headerList)): param = cols[j] if param.lower() == "nan": param = "-9999" print "Found a nan in catalog ", os.path.basename(infile) print "set to -9999 in the markup." if len(headerList[j]) == 2: column_no, name = headerList[j] if excludeList and name in excludeList: continue xmlcatalog.write(format2 % (name, param)) elif len(headerList[j]) == 3: column_no, name, number = headerList[j] if excludeList and name in excludeList: continue xmlcatalog.write(format3 % (name, str(number), param)) else: raise TypeError, "Incompatible tuple size" # In the case where some parameter has multiple columns of output and is # the last parameter listed, this for loop attachs that last parameter name # to all remaining values. This behaviour is seen in the case where the parameter # VIGNET is the last parameter listed and all subsequent columns in the Sextractor # catalog are elements of the VIGNET array. for j in range(len(headerList), len(cols)): param = cols[j] xmlcatalog.write(format2 % (name, param)) xmlcatalog.write(" </object>\n") #break xmlcatalog.write(" </data>\n") xmlcatalog.close() return
def _magFix(self, catalogFile): """This private method receives a path to a catalog file and sifts through the MAGERR field looking for values > 10. It sets the corresponding MAG field = -99 and sets that object's MAGERR field to 0.0. catalogFile is a path not a file object.""" # fillHeader will return a list of tuples where which looks like # # [(1, 'NUMBER'), # (2, 'X_IMAGE'), # (3, 'Y_IMAGE'), # ... # (12, 'MAG_ISOCOR'), # (13, 'MAGERR_ISOCOR'), # (14, 'FLUX_APER', 1) # (15, 'FLUX_APER', 2), # (16, 'FLUX_APER', 3), # ... # ] # # The tuples are either of length 2 or 3. If len is 3, the 3rd item of the # tuple is the nth occurance of that column identifier. This occurs on those # columns of MAGs and MAGERRs for a series of increasingly larger apertures. # newFieldList will be a list of Numeric arrays containing the columns of the catalogs. # This list will contain fields which have not been altered, i.e. all fields other than # MAG_* and MAGERR_*, and the new MAG and MAGERR fields which have been corrected. # Once the list is complete, it is tuple-ized and send to the tableio pu_data function. newFieldList = [] newMagsList = [] newMagErrsList = [] newMagHeaders = [] newMagErrHeaders = [] newHeaders = [] magCols = [] magErrCols = [] selectSet = fillHeader(catalogFile) print "Searching catalog for required columns, MAG, MAGERR" for i in range(len(selectSet)): if len(selectSet[i]) == 2: column, name = selectSet[i] paramNames = name.split("_") if "MAG" in paramNames: magCols.append((column, name)) elif "MAGERR" in paramNames: magErrCols.append((column, name)) else: oldField = tableio.get_data(catalogFile, (column - 1)) newFieldList.append(oldField) newHeaders.append(name) continue else: column, name, id = selectSet[i] paramNames = name.split("_") if "MAG" in paramNames: magCols.append((column, name, id)) elif "MAGERR" in paramNames: magErrCols.append((column, name, id)) else: oldField = tableio.get_data(catalogFile, (column - 1)) newFieldList.append(oldField) newHeaders.append(name) continue # We now have # catalog field --> list # -------------------------------- # MAG_* --> magCols # MAGERR_* --> magErrCols # # The algorithm will be to step through the magErrCols columns, extracting those fields # via get_data and getting Numeric arrays. The matching mag columns are slurped as well. # We search the magErrCols arrays looking for >= 10 values and then marking the those mags # as -99.0 and the matching magerrs as 0.0 # See Bugzilla bug #2700 for item in magErrCols: magErrAperId = None # item may be of len 2 or 3 if len(item) == 2: magErrColId, magErrColName = item else: magErrColId, magErrColName, magErrAperId = item magErrKind = magErrColName.split("_")[1] # ISO, ISOCORR, etc. print "\n\nMAG type:", magErrKind if magErrAperId: print magErrColName, "Aper id is", magErrAperId print "Getting\t", magErrColName, "\tfield", magErrColId # MAGERR array: magErrs = tableio.get_data(catalogFile, magErrColId - 1) matchingMagColName = None matchingMagColId = None #----------------------- Search for matching MAG_* field -----------------------# for magitems in magCols: # We know that the magErrColName is MAGERR and if magErrNameId is true then # the tuple is of len 3, i.e. a MAGERR_APER field. We look for the matching # MAG_APER field id, 1, 2, 3... etc. if len(magitems) == 3: magColId, magColName, magAperId = magitems if magColName == "MAG_" + magErrKind: matchingMagColName = magColName #print "Found matching field type:",magColName,"in field",magColId if magAperId == magErrAperId: print "Found matching aperture id." print "MAG_APER id: ", magAperId, "MAGERR_APER id: ", magErrAperId matchingMagColId = magColId matchingMags = tableio.get_data( catalogFile, magColId - 1) break else: continue else: magColId, magColName = magitems if magColName == "MAG_" + magErrKind: print "Found matching field type:", magColName, "in field", magColId matchingMagColName = magColName matchingMagColId = magColId matchingMags = tableio.get_data( catalogFile, magColId - 1) break else: continue #--------------------------------------------------------------------------------# print " MAG err field:", magErrColName, magErrColId print " Mag field:", matchingMagColName, matchingMagColId # Now the grunt work on the arrays, # magErrs, matchingMags # # update: flagging all MAGs as -99 when the corresponding MAGERR > 10 # introduced a bug which unintentionally reset the magnitudes # SExtractor had flagged with a MAG = 99.0 and a MAGERR = 99.0 # This now checks for a MAGERR of 99 and does not reset the MAG value # if MAGERR = 99.0 but does for all other MAGERRS > 10.0 badMagErrs1 = Numeric.where(magErrs >= 10, 1, 0) badMagErrs2 = Numeric.where(magErrs != 99.0, 1, 0) badMagErrs = badMagErrs1 * badMagErrs2 del badMagErrs1, badMagErrs2 newMags = Numeric.where(badMagErrs, -99.0, matchingMags) newMagErrs = Numeric.where(badMagErrs, 0.0, magErrs) newMagsList.append(newMags) newMagHeaders.append(matchingMagColName) newMagErrsList.append(newMagErrs) newMagErrHeaders.append(magErrColName) # concatenate the lists. This is done to preserve the MAG_APER and MAGERR_APER # grouping of the original SExtractor catalog. newFieldList = newFieldList + newMagsList newFieldList = newFieldList + newMagErrsList newHeaders = newHeaders + newMagHeaders newHeaders = newHeaders + newMagErrHeaders newVariables = tuple(newFieldList) # rename the old catalog file as catalogFile.old os.rename(catalogFile, catalogFile + ".old") self.outputList[os.path.basename(catalogFile) + ".old"] = [os.path.basename(catalogFile)] fob = open(catalogFile, 'w') fob.write("## " + ptime() + "\n") fob.write("## " + self.modName + " catalog regenerated by _magFix method.\n") fob.write( '## (This file was generated automatically by the ACS Pipeline.)\n##\n' ) fob.write( "## This catalog has been photometrically corrected to remove\n") fob.write("## 'bad' magnitude values.\n") fob.write("##\n") for i in range(len(newHeaders)): fob.write("# " + str(i + 1) + "\t" + newHeaders[i] + "\n") fob.close() tableio.put_data(catalogFile, newVariables, append="yes") return
def splice(self): """Method splices the photo-z catalog (.bpz) file and the multicolor.cat file to produce a final photometric redshift catalog. Raises an exception if one or both files cannot be found. All these files will be in dir defined by self.obsCats. """ self.bpzCat = os.path.join(self.obsCats, 'bpz.cat') if not os.path.exists(self.colorCat): raise IOError, "Multicolor catalog file not found." elif not os.path.exists(self.bpzCat): raise IOError, "BPZ catalog file not found." # Use the fillHeader function to get a list of header lines from each catalog. bpzHeaders = fillHeader(self.bpzCat) colorHeaders = fillHeader(self.colorCat) allH = bpzHeaders + colorHeaders # delete the extra element containing the 'NUMBER' column. for i in range(len(allH)): col, name = allH[i] if name == 'NUMBER': del allH[i] break # Renumber the columns via a counter for i in range(len(allH)): col, name = allH[i] allH[i] = (i + 1, name) # open the new catalog file and write these headers newCat = open(os.path.join(self.obsCats, 'final_photometry.cat'), 'w') newCat.write('## Photometry Catalog for Observation: ' + self.obsName + '\n') newCat.write('## Generated by the ACS Pipeline, ' + ptime() + '\n') newCat.write('##\n') f1 = open(self.bpzCat) while 1: line = f1.readline() fields = string.split(line) if fields[0] == '##': newCat.write(line) else: break f1.close() del f1 for col, name in allH: newCat.write('# ' + str(col) + '\t' + name + '\n') # slurp up the data from each catalog. cat1 = open(self.bpzCat).readlines() cat2 = open(self.colorCat).readlines() # grab just the data lines cat1Data = [] cat2Data = [] for line in cat1: if '#' in line: pass else: cat1Data.append(string.rstrip(line)) # Delete the extra field 'NUMBER' from colorCat data as was done (above) for the header. for line in cat2: if '#' in line: pass else: fields = string.split(string.rstrip(line)) del fields[0] newline = string.joinfields(fields) cat2Data.append(newline) # Write the concatenated line to the new catalog if len(cat1Data) != len(cat2Data): raise IndexError, ("Catalog length mismatch.") for i in range(len(cat1Data)): newCat.write(cat1Data[i] + ' ' + cat2Data[i] + '\n') newCat.close() return
def _hackit(self, cat, keep_apertures=[1, 2, 3]): """hack the detectionCatalog.cat file to take out a bunch of the aperture data. Default is only to keep the first three apertures in the final catalog but caller can change this by passing a keep_apertures list (this is aperture number and *not* the radius). This will hack the columns indicated by MAG_APER MAGERR_APER FLUX_APER FLUXERR_APER """ dir, old_file = os.path.split(cat) headerList = [] headerList = pUtil.fillHeader( cat) # this returns a list of the catalog header. # go through the header and find the columns to keep. We are looking for #'FLUX_APER', 1) #'FLUX_APER', 2) #'FLUX_APER', 3) #'FLUXERR_APER', 1) #'FLUXERR_APER', 2) #'FLUXERR_APER', 3) #'MAG_APER', 1) #'MAG_APER', 2) #'MAG_APER', 3) #'MAGERR_APER', 1) #'MAGERR_APER', 2) #'MAGERR_APER', 3) newheader = [] for i in headerList: if len(i) == 2: newheader.append(i) else: if i[2] not in keep_apertures: continue else: newheader.append(i) #return newheader cols = [] for i in newheader: cols.append(i[0] - 1) new_rows = [] for row in open(cat).readlines(): if '#' in row: continue fields = row.split() arow = '' for column in cols: arow += ' ' + fields[column] new_rows.append(arow) # OK, we have the newheader and the new data . # We need to renumber the columns in the header. the newheader # list has the old catalog's column identifiers and that needs to get fixed. new_newheader = [] for i in range(len(newheader)): if len(newheader[i]) == 2: new_newheader.append((i + 1, newheader[i][1])) else: new_newheader.append((i + 1, newheader[i][1], newheader[i][2])) # Now we are just going to overwrite the original detectionImage.cat file # (well, at least whatever was passed to this func, anyway) file = open(cat, 'w') self.logfile.write( "private method _hackit, trimming aperture parameters") self.logfile.write("_hackit overwriting detectionImage.cat file.") file.write("## Date: " + pUtil.ptime() + "\n") file.write( "## This file has been modified from its original form by the WFP Pipeline.\n" ) file.write("## Some aperture fields have been removed.\n") file.write("## This file written by the WFP Pipeline. Do not edit.\n") for item in new_newheader: file.write('# ' + str(item[0]) + '\t' + str(item[1]) + '\n') for row in new_rows: file.write(row + '\n') file.close() return