def update_holding_list(new_entry, processed_list): """ update <hosue_keeping>/keep_entry list Input: new_entry --- a list of obsids used processed_list --- a list of obsids actually processed Output: <hosue_keeping>/keep_entry list """ # #-- find whether any of obsids were not proccessed # missing = mcf.find_missing_elem(new_entry, processed_list) file = house_keeping + 'keep_entry' f = open(file, 'w') if len(missing) > 0: # #--- if so, print them out # missing = mcf.removeDuplicate(missing, chk=0) for ent in missing: f.write(ent) f.write('\n') f.close()
def get_amp_avg_data(new_entry): """ extract amp_avg information from a fits file for a given obsid Input: new_entry --- a list of obsids Output: amp_data_lst --- a list of avg_amp kept in hosue_keeping dir (format: 2013-10-27T06:11:52 0.218615253807107 53275) processed_list - a list of obsid which actually used to generate avg_amp """ # #--- remove dupilcated entries of obsid # new_entry = mcf.removeDuplicate(new_entry, chk=0) processed_list = [] amp_data_list = [] for obsid in new_entry: # #--- extract fits file(s) # fits_list = extract_stat_fits_file(obsid, out_dir=temp_dir) for fits in fits_list: # #--- read header entry # dout = pyfits.open(fits) date = dout[0].header['DATE-OBS'] date.strip() # #--- extreact column data for ccd_id and drop_amp # data = pyfits.getdata(fits, 1) ccdid = data.field('ccd_id') drop_amp = data.field('drop_amp') amp_data = [] sum = 0 for i in range(0, len(ccdid)): # #--- amp data is computed from when ccd 7 drop_amp # if int(ccdid[i]) == 7: val = float(drop_amp[i]) amp_data.append(val) sum += val if len(amp_data) > 0: norm_avg = 0.00323 * sum / float( len(amp_data)) #--- 0.00323 is given by cgrant (03/07/05) line = date + '\t' + str(norm_avg) + '\t' + str(obsid) + '\n' else: line = date + '\t' + '999999' + '\t' + str(obsid) + '\n' processed_list.append(obsid) amp_data_list.append(line) return [processed_list, amp_data_list]
def cleanUp(cdir): """ sort and remove duplicated lines in all files in given data directory Input cdir --- directory name Output cdir/files ---- cleaned up files """ if os.listdir(cdir) != []: cmd = 'ls ' + cdir + '/* > ' + zspace os.system(cmd) data = mcf.readFile(zspace) mcf.rm_file(zspace) for file in data: # #--- avoid html and png files # m = re.search('\.', file) if m is None: mcf.removeDuplicate(file, chk=1, dosort=1)
def cleanUp(cdir): """ sort and remove duplicated lines in all files in given data directory Input cdir --- directory name Output cdir/files ---- cleaned up files """ if os.listdir(cdir) != []: cmd = 'ls ' + cdir + '/* > ' + zspace os.system(cmd) data = mcf.readFile(zspace) mcf.rm_file(zspace) for file in data: # #--- avoid html and png files # m = re.search('\.', file) if m is None: mcf.removeDuplicate(file, chk = 1, dosort=1)
def clean_cti_data_table(dir): """ remmove data points which are extrme outlyers and then clean up output data tables. Input: dir --- the directory where the data files are kept Output: updated data files in the directory <dir> """ dropped = data_dir + dir + '/dropped_data' fo = open(dropped, 'w') dropped_obsids = [] for elm in elm_list: line = 'ELM: ' + elm + '\n' fo.write(line) for ccd in range(0, 10): if ccd == 5 or ccd == 7: drop_factor = 5.0 #--- drop_factor sets the boundray of the outlyer: how may signam away? else: drop_factor = 4.0 # #--- check the input file exists # dname = data_dir + dir + '/' + elm + '_ccd' + str(ccd) chk = mcf.isFileEmpty(dname) if chk > 0: line = 'CCD: ' + str(ccd) + '\n' fo.write(line) f = open(dname, 'r') data = [line.strip() for line in f.readlines()] f.close() # #--- separate data into separate array data sets # dcolumns = separate_data(data) cti = ['' for x in range(4)] cti[0] = dcolumns[0] cti[1] = dcolumns[1] cti[2] = dcolumns[2] cti[3] = dcolumns[3] obsid = dcolumns[10] dom = [] for ent in dcolumns[8]: time_list = tcnv.dateFormatConAll(ent) dom.append(time_list[7]) # #--- go around quads # drop_list = [] for i in range(0, 4): line = "QUAD" + str(i)+ '\n' fo.write(line) # #--- fit a lienar line # (intc, slope) = linear_fit(dom, cti[i]) sum = 0 # #--- compute a deviation from the fitted line # diff_save = [] for j in range(0, len(dom)): diff = float(cti[i][j]) - (intc + slope * float(dom[j])) diff_save.append(diff) sum += diff * diff sigma = math.sqrt(sum/len(dom)) # #--- find outlyers # out_val = drop_factor * sigma for j in range(0, len(dom)): if diff_save[j] > out_val: drop_list.append(j) fo.write(data[j]) fo.write('\n') # #--- clean up the list; removing duplicated lines # drop_list = mcf.removeDuplicate(drop_list, chk = 0) cleaned_data = [] for i in range(0, len(dom)): chk = 0 for comp in drop_list: if i == comp: chk = 1 break if chk == 0: cleaned_data.append(data[i]) cleaned_data = mcf.removeDuplicate(cleaned_data, chk = 0) for ent in drop_list: dropped_obsids.append(obsid[ent]) f = open(dname, 'w') for ent in cleaned_data: f.write(ent) f.write('\n') f.close() fo.close() dropped_obsids = mcf.removeDuplicate(dropped_obsids, chk = 0) out = data_dir + dir + '/bad_data_obsid' f = open(out, 'w') for ent in dropped_obsids: f.write(ent) f.write('\n')
def cleanup_amp_list(): """ remove duplicated obsid entries: keep the newest entry only Input: read from: <hosue_keeping>/amp_avg_lst Output: updated <hosue_keeping>/amp_avg_lst """ file = house_keeping + 'amp_avg_list' f = open(file, 'r') data = [line.strip() for line in f.readlines()] f.close() # #--- reverse the list so that we can check from the newest entry # data.reverse() # #--- find out which obsids are listed multiple times # obsidlist = [] for ent in data: atemp = re.split('\s+|\t+', ent) obsid = int(atemp[2]) obsidlist.append(obsid) obsidlist.sort() obsidmulti = [] comp = obsidlist[0] for i in range(1, len(obsidlist)): if comp == obsidlist[i]: obsidmulti.append(obsidlist[i]) else: comp = obsidlist[i] # #--- if there are multiple obsid entries, keep the newest one and remove older ones # cleaned = [] if len(obsidmulti) > 0: obsidmulti = mcf.removeDuplicate(obsidmulti) # #--- "marked" is a marker which indicates whether a specific obsid is already listed # for i in range(0, len(obsidmulti)): marked[i] = 0 for ent in data: atemp = re.split('\s+', ent) obsid = int(atemp[2]) chk = 0 for i in range(0, len(obsidmulti)): if (obsid == obsidmulti[i]) and (marked[i] == 0): marked[i] = 1 break elif (obsid == obsidmulti[i]) and (marked[i] > 0): chk = 1 break if chk == 0: cleaned.append(ent) else: cleaned = data # #--- reverse back to the original order # cleaned.reverse() # #--- print out the cleaned list # f = open(file, 'w') for ent in cleaned: f.write(ent) f.write('\n') f.close()