def plot(fname): setup_text_plots(fontsize=10, usetex=True) data = dl.read_dat(fname,',') n = len(data[0]) - 4 f,plots = plt.subplots(int(np.ceil(n/2))+n%2,2, sharex=False,sharey=False) sigmas = [] for sigma in range(0,n): sigmas.append(dl.get_column(data,4+sigma)) for j in range(0,len(sigmas)): sigma = sigmas[j] good_sigmas = [] for i in range(0,len(sigma)): if sigma[i] != 'nan': good_sigmas.append((float(sigma[i]))) good_sigmas = np.sort(good_sigmas) good_sigmas = good_sigmas[0:int(.8*len(good_sigmas))] hist = np.histogram(good_sigmas,bins = 175) plots[np.ceil((j)/2)][(j)%2].bar(hist[1][:-1],hist[0],width=hist[1][1]-hist[1][0]) plots[np.ceil((j)/2)][(j)%2].set_xlabel('$\sigma_{'+str(j+1)+'}$',fontsize=40) plots[np.ceil((j)/2)][(j)%2].set_ylabel('$n$',fontsize=40) plots[np.ceil((j)/2)][(j)%2].text(plots[np.ceil((j)/2)][(j)%2].get_xlim()[1]*0.9,plots[np.ceil((j)/2)][(j)%2].get_ylim()[1]*0.75,'$\sigma_{'+str(j+1)+'}$',fontsize=40) plots[np.ceil((j)/2)][(j)%2].tick_params(axis='both', which='major', labelsize=20) f.set_size_inches(32,20) f.savefig('histograms/sigma_histo_' + fname+'.png', dpi = 300)
def detect(fname): data = dl.read_dat(fname,',') n = len(data[0]) - 4 sigmas = [] stds = [] ids = dl.get_column(data,0) for sigma in range(0,n): sigmas.append(dl.get_column(data,4+sigma)) for star in range(len(sigmas[0])): star_sigmas = [] for sigma in range(0,n): if sigmas[sigma][star] != 'nan' and sigmas[sigma][star] != '': star_sigmas.append(float(sigmas[sigma][star])) stds.append(np.std(star_sigmas)) stds_median = np.median(stds) stds_std = np.std(stds) bad_stars = [] for i in range(len(stds)): if abs(stds[i]-stds_median) > 3*stds_std: bad_stars.append(ids[i]) out = '' for star in bad_stars: out = out + star + ',' print fname + ' : ' + out
def plot(file): print file data = dl.read_dat(file,',') time = dl.get_column_numerized(data,0) flux = dl.get_column_numerized(data,1) plt.plot(time,flux) plt.show()
def plot(fname): setup_text_plots(fontsize=10, usetex=True) data = dl.read_dat(fname,',') print data[0] n = len(data[0]) - 4 print int(np.ceil(n/2))+n%2 f,plots = plt.subplots(int(np.ceil(n/2))+n%2,2, sharex=False,sharey=False) dwarf_flags = dl.get_column(data,1) kepmags = dl.get_column(data,3) Teffs = dl.get_column(data,2) sigmas = [] for sigma in range(0,n): sigmas.append(dl.get_column(data,4+sigma)) length = len(dl.get_column(data,0)) for i in range(0,length): done = [] for seg in range(0,n): done.append(seg) is_dwarf = dwarf_flags[i] if is_dwarf == '1.0' or is_dwarf == '1': symbol = 'd' elif is_dwarf == '0.0' or is_dwarf == '0': symbol = 'o' else: symbol = 'x' x = int(np.ceil((seg)/2)) y = (seg)%2 plots[x][y].set_ylabel('$\log{\sigma_{' + str(seg+1) +'}}$',fontsize=40) plots[x][y].set_xlabel('Kepler band magnitude',fontsize=20) plots[x][y].tick_params(axis='both', which='major', labelsize=20) plots[x][y].grid(True) plots[x][y].set_xlim([8.25,17]) plots[x][y].set_ylim([-4.5,0]) plots[x][y].text(15.25,-0.5,'Segment ' + str(seg+1),fontsize=30) if sigma != 'nan' and kepmags[i] != 'nan' and sigmas[seg][i] != 'nan' and sigmas[seg][i] != '': plots[x][y].scatter(float(kepmags[i]),np.log10(float(sigmas[seg][i])),marker=symbol,color=color_T(Teffs[i])) print fname + ' : ' + str(done) + ' : ' + str((i*100.0)/length) + '%' f.set_size_inches(32,32) f.savefig(fname+'.png', dpi = 300)
def plot(fname): setup_text_plots(fontsize=10, usetex=True) data = dl.read_dat(fname,',') mags = dl.get_column(data,3) for i in range(0,len(mags)): if mags[i] == 'nan': del mags[i] else: mags[i] = float(mags[i]) hist = np.histogram(mags,bins=100) plt.bar(hist[1][:-1],hist[0],width=hist[1][1]-hist[1][0]) plt.xlabel('Kepler band magnitude',fontsize=50) plt.ylabel('$n$',fontsize=50) plt.tick_params(axis='both', which='major', labelsize=40) plt.gcf().set_size_inches(32,20) plt.gcf().savefig(fname.replace('evaluations/','histograms/mag/mag_histo_') +'.png', dpi = 300)
def medians(fname): setup_text_plots(fontsize=10, usetex=True) data = dl.read_dat(fname,',') n = len(data[0]) - 4 sigmas = [] for sigma in range(0,n): sigmas.append(dl.get_column(data,4+sigma)) medians = [] for j in range(0,len(sigmas)): sigma = sigmas[j] good_sigmas = [] for i in range(0,len(sigma)): if sigma[i] != 'nan': good_sigmas.append((float(sigma[i]))) good_sigmas = np.sort(good_sigmas) good_sigmas = good_sigmas[0:int(.8*len(good_sigmas))] medians.append(np.median(good_sigmas)) return medians
import datalib as dl import numpy as np import matplotlib.pyplot as plt data = dl.read_dat('evaluation.csv',',') sigmas = [] for sigma in range(4,9): sigmas.append(dl.get_column(data,sigma)) medians = [] for sigma in sigmas: good_sigmas = [] for i in range(1,len(sigma)): if sigma[i] != 'nan': if np.log10(float(sigma[i])): good_sigmas.append(float(sigma[i])) good_sigmas = np.sort(good_sigmas) good_sigmas = good_sigmas[0:int(0.8*len(good_sigmas))] medians.append(np.median(good_sigmas)) plt.plot([1,2,3,4,5],medians) plt.show()
targets = datalib.get_column( K2.read_target_file('K2Campaign0targets.csv - K2Campaign0targets.csv'), 0) del targets[0] result = [[ 'EPIC', 'is_dwarf', 'teff', 'kepmag', 'sigma_1', 'sigma_2', 'sigma_3', 'sigma_4', 'sigma_5' ]] errors = [] quality = [[ '#EPIC', 'seg_1_ratio', 'seg_1_flag', 'seg_2_ratio', 'seg_2_flag', 'seg_3_ratio', 'seg_3_flag', 'seg_4_ratio', 'seg_4_flag', 'seg_5_ratio', 'seg_5_flag' ]] tess_data = datalib.read_dat('k2tess.csv', ',') del tess_data[0] faint_data = datalib.read_dat('k2faint.csv', ',') del faint_data[0] for i in range(0, len(targets)): target = targets[i] print target + ' : ' + str(int((100.0 * i) / len(targets))) + '%' info = K2.get_K2_info(int(target), poly_fit_deg=8) if info['error'] == 'None': star = [] star_data_tess = get_data(target, tess_data) if star_data_tess != ['nan'] * 3: star = [target] + star_data_tess
return [0, 0, 0] Teff = float(Teff) x = (Teff - 2265) / 5375.0 return [-x * x + 1, -4 * x * (x - 1), -x * (x - 2)] def label_d(is_dwarf): if is_dwarf == '1.0': return 'Dwarf' elif is_dwarf == '0.0': symbol = 'Giant' else: symbol = 'Unkown' data = dl.read_dat('evaluation.csv', ',') f, plots = plt.subplots(3, 2, sharex=False, sharey=False) dwarf_flags = dl.get_column(data, 1) kepmags = dl.get_column(data, 3) Teffs = dl.get_column(data, 2) sigmas = [] for sigma in range(4, 9): sigmas.append(dl.get_column(data, sigma)) length = len(dl.get_column(data, 0)) for i in range(1, length): for seg in range(4, 9): is_dwarf = dwarf_flags[i] if is_dwarf == '1.0':
KEPLER_CADENCE = 29.42 def get_data(star_id,data): for row in data: if float(star_id) == float(row[0]): return row[1:4] return ['nan']*3 targets = datalib.get_column(K2.read_target_file('K2Campaign0targets.csv - K2Campaign0targets.csv'),0) del targets[0] result = [['EPIC','is_dwarf','teff','kepmag','sigma_1','sigma_2','sigma_3','sigma_4','sigma_5']] errors = [] quality = [['#EPIC','seg_1_ratio','seg_1_flag','seg_2_ratio','seg_2_flag','seg_3_ratio','seg_3_flag','seg_4_ratio','seg_4_flag','seg_5_ratio','seg_5_flag']] tess_data = datalib.read_dat('k2tess.csv',',') del tess_data[0] faint_data = datalib.read_dat('k2faint.csv',',') del faint_data[0] for i in range(0,len(targets)): target = targets[i] print target + ' : ' + str(int((100.0*i)/len(targets))) + '%' info = K2.get_K2_info(int(target),poly_fit_deg = 8) if info['error'] == 'None': star = [] star_data_tess = get_data(target,tess_data) if star_data_tess != ['nan']*3: star = [target] + star_data_tess
def evaluate(folder_name): #ignores NumPy warnings warnings.simplefilter('ignore', np.RankWarning) #loads K2 tess and K2 faint catalogs tess_data = dl.read_dat('k2tess.csv',',') del tess_data[0] tess_data_ids = dl.get_column(tess_data,0) tess_data_is_dwarf = dl.get_column(tess_data,1) tess_data_teff = dl.get_column(tess_data,2) tess_data_mag = dl.get_column(tess_data,3) faint_data = dl.read_dat('k2faint.csv',',') del faint_data[0] faint_data_ids = dl.get_column(faint_data,0) faint_data_is_dwarf = dl.get_column(faint_data,1) faint_data_teff = dl.get_column(faint_data,2) faint_data_mag = dl.get_column(faint_data,3) output = [] #2d array that will be stored in a .csv file. [[id,is_dwarf,teff,mag,sigma1,sigma2...],[id,is_dwarf,teff,mag,sigma1,sigma2...]...] not_on_catalog = [] #list of the stars that could not be found in the tess and faint catalogs dirlist = os.listdir(folder_name) for n in range(0,len(dirlist)): fname = dirlist[n] data = dl.read_dat(folder_name + '/' + fname,',') #2d array containing the light curve. [[time,flux,segment],[time,flux,segment],...] #Searching on K2 catalogs for info star_id = '' is_dwarf = '' teff = 0 mag = 0 for i in range(0,len(tess_data_ids)): if str(tess_data_ids[i]).replace('.0','') in fname: star_id = tess_data_ids[i] is_dwarf = tess_data_is_dwarf[i] teff = tess_data_teff[i] mag = tess_data_mag[i] break del tess_data_ids[i],tess_data_mag[i],tess_data_teff[i],tess_data_is_dwarf[i] if star_id == '': #if it was not found in the tess catalog, it searches the faint catalog for i in range(0,len(faint_data_ids)): if str(faint_data_ids[i]).replace('.0','') in fname: star_id = faint_data_ids[i] is_dwarf = faint_data_is_dwarf[i] teff = faint_data_teff[i] mag = faint_data_mag[i] break del faint_data_ids[i],faint_data_mag[i],faint_data_teff[i],faint_data_is_dwarf[i] print star_id + ' : ' + str((n*100.0/len(dirlist))) + '%' #To know how much is already done if star_id != '' and len(data) > 1: #The code only runs if the star is on the catalog and there is data on the lightcurve # #Computes standard deviations for each segment: # time = [] flux = [] segment = [] for row in data: time.append(float(row[0])) flux.append(float(row[1])) segment.append(row[2]) #Deletes >3sigma points and nan segments sigma = np.std(flux) avg = np.average(flux) i = 0 end = len(time) while i in range(0,end): if abs(flux[i]-avg) > 3*sigma or str(segment[i]) == 'nan': del time[i],flux[i],segment[i] end = len(time) i+=1 #Splits into segments n = 0 #Number of segments for i in range(1,len(segment)): if str(segment[-i]) != 'nan': n = int(float(segment[-i]))+1 break time_seg = [] flux_seg = [] for i in range(0,n): time_seg.append([]) flux_seg.append([]) for i in range(0,len(time)): if segment[i] != 'nan': time_seg[int(float(segment[i]))].append(time[i]) flux_seg[int(float(segment[i]))].append(flux[i]) #Subtracts the polynomial fit (4 deg) in each segment and computes the standard deviation sigmas = [] for i in range(0,len(time_seg)): if time_seg[i] != [] and len(time_seg[i]) > .5*(time_seg[i][0]-time_seg[i][-1])/KEPLER_CADENCE: #This conditional checks if there is data in the segment and also that the segment have more than the 50% of the expected points poly = np.poly1d(np.polyfit(time_seg[i],flux_seg[i],4)) for j in range(0,len(time_seg[i])): flux_seg[i][j] -= poly(time_seg[i][j]) sigmas.append(np.std(flux_seg[i])) else: sigmas.append('nan') output.append([star_id,is_dwarf,teff,mag]+sigmas) else: not_on_catalog.append(fname) print not_on_catalog dl.write_dat(output,'evaluations/evaluation_of_' + folder_name + '.csv',',')