def fireTrack(): # import alternate data root = r'C:\Users\ARL\Desktop\Summit\analyses\Data' ace = readCsv(root + '\\' + r'aceRatioNoaa.txt') # import fire data virrs = True root = r'C:\Users\ARL\Desktop\FireData' if virrs: fire = pd.read_csv(root + r'\fire_archive_V1_60132.csv') else: fire = pd.read_csv(root + r'\fire_archive_M6_60131.csv') # data triming, reassign headers, add datetime column header = ['decyear', 'value', 'function', 'resid', 'residsmooth'] ace.columns = header ace = ace[ace['value'] >= 0.00000001] ace['datetime'] = decToDatetime(ace['decyear'].values) ace['normResid'] = ace['resid'].values / ace['value'].values # combine fire and other dataset to produce master dataframe for analysis master = fireCombo(fire, ace, VIRRS=virrs) # identify average z score avg_vals = np.average(master['value_z'].values) avg_norms = np.average(master['normed_z'].values) print(f'The average z score in values is {avg_vals}') print(f'The average z score in normalized residuals is {avg_norms}') mybounds = {'x': (-73.2, -9.4), 'y': (57.8, 84.3)} # scatterplot mapping img = mpimg.imread(root + r'\greenland.PNG') if virrs: master.plot(kind='scatter', x='longitude', y='latitude', c='bright_ti4', cmap=plt.get_cmap('magma_r'), colorbar=True, figsize=(10, 7)) else: master.plot(kind='scatter', x='longitude', y='latitude', c='brightness', cmap=plt.get_cmap('magma_r'), colorbar=True, figsize=(10, 7)) plt.imshow(img, extent=[ mybounds['x'][0], mybounds['x'][1], mybounds['y'][0], mybounds['y'][1] ], alpha=0.5) plt.xlabel('Longitude', fontsize=14) plt.ylabel('Latitude', fontsize=14) if virrs: plt.title('NASA VIIRS Fire Count Overlay on Greenland') else: plt.title('NASA MODIS Fire Count Overlay on Greenland') plt.legend() plt.show()
def ch4plot(): header = ['yr', 'value', 'function', 'resid', 'residLine'] # dataframe headers root = r'C:\Users\ARL\Desktop\J_Summit\analyses\Data' filepath = root + '\\' + 'methane.txt' data = readCsv(filepath) data.columns = header register_matplotlib_converters() # convert the dec year col to datetime dates = decToDatetime(data['yr']) data['datetime'] = dates data.drop('yr', axis=1, inplace=True) # y bounds values = data['value'] mean = np.mean(values) lowV = min(values) - (mean / 100) # arbitrary vals look ok highV = max(values) + (mean / 100) mean = np.mean(data['resid'].values) lowR = min(data['resid']) - (mean / 3) highR = max(data['resid']) + (mean / 3) # x bounds low = min(data['datetime']) - dt.timedelta(days=30) high = max(data['datetime']) + dt.timedelta(days=30) # plotting sns.set() # setup f, ax = plt.subplots(nrows=2, figsize=(12, 8)) # 2 column subplot sns.despine(f) plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=0.3, hspace=0.5) # background data values with fitted harmonic functions ax1 = sns.scatterplot(x='datetime', y='value', data=data, ax=ax[0], alpha=0.7, s=10, legend='brief', label='GC Data') ax2 = sns.lineplot(x='datetime', y='function', data=data, ax=ax[0], linewidth=2, label='Fitted Curve') ax1.set_title('GC Methane Data with Fitted Function') ax1.set_xlabel('Date') ax1.set_ylabel('Mixing Ratio [ppb]') ax1.set(xlim=(low, high)) ax1.set(ylim=(lowV, highV)) ax1.get_lines()[0].set_color('#00b386') ax1.legend() # residual data ax3 = sns.scatterplot(x='datetime', y='resid', data=data, ax=ax[1], alpha=1, s=10, legend='brief', label='Residuals from Fit') ax4 = sns.lineplot(x='datetime', y='residLine', data=data, ax=ax[1], linewidth=2, label='Fitted Residual Curve') ax3.set_title('GC Residuals from Fitted Function') ax3.set_xlabel('Date') ax3.set_ylabel('Mixing Ratio [ppb]') ax4.get_lines()[0].set_color('#00b386') ax3.legend() ax3.set(xlim=(low, high)) ax3.set(ylim=(lowR, highR)) # save the plots direc = r'C:\Users\ARL\Desktop\J_Summit\analyses\Figures' + '\\' + 'methane.png' f.savefig(direc, format='png')
import os from dateConv import visitToDatetime, createDatetime import matplotlib.pyplot as plt import seaborn as sns import datetime as dt from pandas.plotting import register_matplotlib_converters import matplotlib.dates as mdates # For formatting date register_matplotlib_converters() # import the data root = r'C:\Users\ARL\Desktop\Jashan\Summit\analyses\Data' datapath = os.path.join(root, 'TAWO_visit_log.xlsx') visits = loadExcel(datapath) concpath = os.path.join(root, 'ethane.txt') ethane = readCsv(concpath) # data cleaning dates = visits['Date'].values dates = dates[1:] badcols = ['Initials', 'Unnamed: 5', 'Date'] visits.drop(badcols, axis=1, inplace=True) visits.drop([0], axis=0, inplace=True) visits.dropna(axis=0, how='all', inplace=True) visits.reset_index(drop=True, inplace=True) ethane.columns = ['yr', 'mo', 'dy', 'hr', 'na', 'val'] # create proper datetimes visits['start'], visits['end'] = visitToDatetime( dates, visits['Arrival time (Z)'].values,
def plotratios(hours, ethane=True, all=True, summer=True, viirs=True): """ plotratios is a function that imports either the acetylene/methane ratio or the ethane/methane ratio data and plots it, various conditions can be set. :param hours: Number of back trajectory hours ran with Hysplit, used for plot titles :param ethane: Default True. Set to false for acetylene data. :param all: Default True, uses all data. Set to false to cut z scores below 3. :param summer: Default True, cuts winter data. Set to false to use only winter data and cut summer data :param viirs: Default True, uses viirs fire data. Set to false to use MODIS C6 data :return: nothing, displays plot with plt.show() """ # Create titles and set data path depending on options dataroot = r'C:\Users\ARL\Desktop\Jashan\Summit\analyses\Data' # data directory trajroot = r'C:\Users\ARL\Desktop\Jashan\Jashan PySplit\pysplitprocessor-master\pysplitprocessor\messeduptime_notUTC' if ethane: if all: root = os.path.join(trajroot, 'ethane_methane_all') title = f'{hours}h Back Trajectories of Ethane/Methane Ratio, 2012-2019' sheet = readCsv(dataroot + r'\ethaneRatioNoaa.txt') else: title = f'{hours}h Back Trajectories of Ethane/Methane Ratio Outliers, 2012-2019' sheet = readCsv(dataroot + r'\ethaneRatioNoaa.txt') else: if all: root = r'C:\Users\ARL\Desktop\Jashan\Jashan ' \ r'PySplit\pysplitprocessor-master\pysplitprocessor\aceTraj' title = f'{hours}h Back Trajectories of Acetylene/Methane Ratio, 2012-2018' sheet = readCsv(dataroot + r'\aceRatioNoaa.txt') else: title = f'{hours}h Back Trajectories of Acetylene/Methane Ratio Outliers, 2012-2019' root = r'C:\Users\ARL\Desktop\Jashan ' \ r'PySplit\pysplitprocessor-master\pysplitprocessor\ace_methane_traj_highz' sheet = readCsv(dataroot + r'\aceRatioNoaa.txt') header = ['decyear', 'value', 'function', 'resid', 'residsmooth'] # create header sheet.columns = header # assign column names sheet = sheet[sheet['value'] >= 0.000001] # remove zero values sheet['datetime'] = decToDatetime(sheet['decyear'].values) # create datetimes from decyear sheet['datetime'] = sheet['datetime'] + pd.Timedelta('3 hours') # convert tz to UTC dates = sheet['datetime'].tolist() # put datetimes in list julian = [] # preallocate for d in dates: # loop over each date tt = d.timetuple() # create a timetuple from date jul = tt.tm_yday # get the julian year julian.append(jul) # append that to a list sheet['julian'] = julian # add to dataframe cutoffs = (120, 305) if summer: keep = np.logical_and(sheet['julian'] >= cutoffs[0], # find just summer values sheet['julian'] <= cutoffs[1]) print('-- Winter Data Removed') else: keep = ~(np.logical_and(sheet['julian'] >= cutoffs[0], # find just winter values sheet['julian'] <= cutoffs[1])) print('-- Summer Data Removed') sheet = sheet[keep] dropcols = ['decyear', 'function', 'residsmooth'] # columns to drop sheet.drop(dropcols, axis=1, inplace=True) # drop unused columns # remove slow data or data above 342, below 72 degrees at Summit camp due to possible pollution sheetClean = metRemove(sheet, 1, dropMet=True) residuals = sheetClean['resid'].values # numpy array of resid z = np.abs(stats.zscore(residuals)) # calculate z scores sheetClean['zscores'] = z # assign as column if all: thresh = 0 # z score threshold else: thresh = 3 sheetZ = sheetClean[z > thresh] # remove non outliers sheetZ.reset_index(drop=True, inplace=True) trajPlot(root, title=title, zscores=sheetZ, viirs=viirs, summer=summer)
""" import numpy as np from trajectoryPlotting import trajPlot from fileLoading import readCsv from dateConv import decToDatetime from metRemove import metRemove from scipy import stats hours = 72 title = f'{hours}h Back Trajectories of Acetylene/Methane Ratio Outliers, 2012-2018' root = r'C:\Users\ARL\Desktop\Jashan PySplit\pysplitprocessor-master\pysplitprocessor\ace_methane_traj' dataroot = r'C:\Users\ARL\Desktop\Summit\analyses\Data' # data directory ace = readCsv(dataroot + r'\aceRatioNoaa.txt') # data read in acetylene header = ['decyear', 'value', 'function', 'resid', 'residsmooth'] # assign column names ace.columns = header ace = ace[ace['value'] >= 0.00000001] ace['datetime'] = decToDatetime( ace['decyear'].values) # create datetimes from decyear dates = ace['datetime'].tolist() # put datetimes in a list julian = [] # preallocate julian day list for d in dates: # loop over each date tt = d.timetuple() # create a timetuple jul = tt.tm_yday # identify julian day julian.append(jul) # append to list
def ratioPlot(): register_matplotlib_converters() # import data homedir = r'C:\Users\ARL\Desktop\Jashan\SummitWildfireTracers' root = os.path.join(homedir, 'Data') ethane = readCsv(root + r'\ethaneRatioNoaa.txt') ace = readCsv(root + r'\aceRatioNoaa.txt') # data trimming, reassign headers, add datetime column header = ['decyear', 'value', 'function', 'resid', 'residsmooth'] for sheet in [ethane, ace]: sheet.columns = header ethane = ethane[ethane['value'] >= 0.0000001] ace = ace[ace['value'] >= 0.00000001] ethane.name = 'Ethane' ace.name = 'Acetylene' for sheet in [ethane, ace]: sheet['datetime'] = decToDatetime(sheet['decyear'].values) if sheet.name == 'Ethane': ethane = sheet else: ace = sheet # plotting sns.set() f, ax = plt.subplots(nrows=3, figsize=(12, 8)) sns.despine(f) plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.8) ax1 = sns.scatterplot(x='datetime', y='value', data=sheet, alpha=0.7, label='Original Data', ax=ax[0]) ax2 = sns.lineplot(x='datetime', y='function', data=sheet, linewidth=2, label='Fitted Function', ax=ax[0]) ax1.set_title(sheet.name + ' / Methane Ratio', size=26) ax1.set_xlabel('Datetime', fontsize=22) ax1.set_ylabel('Ratio Value', fontsize=18) ax1.set(xlim=((min(sheet['datetime']) - dt.timedelta(days=10)), (max(sheet['datetime']) + dt.timedelta(days=10)))) ax1.set(ylim=(min(sheet['value']) - np.mean(sheet['value'] / 3), max(sheet['value']) + np.mean(sheet['value'] / 3))) ax2.get_lines()[0].set_color('purple') ax1.legend(prop={'size': 14}) ax3 = sns.scatterplot(x='datetime', y='resid', data=sheet, alpha=0.7, label='Residuals', ax=ax[1]) ax4 = sns.lineplot(x='datetime', y='residsmooth', data=sheet, linewidth=2, label='Smoothed Residual Fit', ax=ax[1]) ax4.get_lines()[0].set_color('purple') ax3.set_title('Residuals in ' + sheet.name, size=26) ax3.set_xlabel('Datetime', fontsize=22) ax3.set_ylabel('Residual / Value', fontsize=18) ax3.set(xlim=((min(sheet['datetime']) - dt.timedelta(days=10)), (max(sheet['datetime']) + dt.timedelta(days=10)))) ax3.set(ylim=(np.mean(sheet['resid']) - np.std(sheet['resid']) * 8, np.mean(sheet['resid']) + np.std(sheet['resid']) * 8)) ax3.legend(prop={'size': 14}) # day of year plot residuals doy = [] for x in sheet['datetime']: tt = x.timetuple() doy.append(tt.tm_yday) sheet['DOY'] = doy ax5 = sns.scatterplot(x='DOY', y='resid', data=sheet, alpha=0.7, label='Residuals', ax=ax[2]) ax5.set_title('Residuals by Julian Day', size=26) ax5.set_xlabel('Day of Year', fontsize=22) ax5.set_ylabel('Residual / Value', fontsize=18) ax5.set(xlim=((min(sheet['DOY'])), (max(sheet['DOY'])))) ax5.set(ylim=(np.mean(sheet['resid']) - np.std(sheet['resid']) * 8, np.mean(sheet['resid']) + np.std(sheet['resid']) * 8)) ax5.legend(prop={'size': 14}) direc = os.path.join(homedir, 'Figures') + '\\' + sheet.name + 'Ratio.png' f.savefig(direc, format='png') for ax in [ax1, ax2, ax3, ax4, ax5]: ax.tick_params(labelsize=18) matplotlib.rc("legend", fontsize=26) # plotting separate heatmap sns.set(style="white", font_scale=1.5) sns.despine() combo = pd.merge_asof(ethane, ace, on='datetime', direction='nearest') combo.dropna(axis=0, inplace=True, how='any') combo.drop(combo.index[5586:5776], axis=0, inplace=True) x = np.array(combo['resid_x']).reshape((-1, 1)) y = np.array(combo['resid_y']) model = LinearRegression().fit(x, y) # create liner regression fit rSquared = model.score(x, y) # assign coeff of determination slope = model.coef_ # assign slope g = sns.jointplot(combo['resid_x'], combo['resid_y'], kind='reg', color='#e65c00', line_kws={ 'label': 'rSquared: {:1.5f}\n Slope: {:1.5f}\n'.format( rSquared, slope[0]) }) g.set_axis_labels('Ethane/Methane Ratio', 'Acetylene/Methane Ratio', fontsize=20) plt.tick_params(axis='both', labelsize=18) g.fig.suptitle('Correlation between Ethane and Acetylene Ratio Residuals', fontsize=28) g.ax_joint.get_lines()[0].set_color('blue') plt.legend() plt.show()
def ratioPlot(): register_matplotlib_converters() # import data root = r'C:\Users\ARL\Desktop\J_Summit\analyses\Data' ethane = readCsv(root + r'\ethaneRatioNoaa.txt') ace = readCsv(root + r'\aceRatioNoaa.txt') # data triming, reassign headers, add datetime column header = ['decyear', 'value', 'function', 'resid', 'residsmooth'] for sheet in [ethane, ace]: sheet.columns = header ethane = ethane[ethane['value'] >= 0.0000001] ace = ace[ace['value'] >= 0.00000001] ethane.name = 'Ethane' ace.name = 'Acetylene' for sheet in [ethane, ace]: sheet['datetime'] = decToDatetime(sheet['decyear'].values) normResid = sheet['resid'].values / sheet['value'].values normSmooth = sheet['residsmooth'].values / sheet['value'].values sheet.drop(['resid', 'residsmooth'], axis=1, inplace=True) sheet['resid'] = normResid sheet['residsmooth'] = normSmooth if sheet.name == 'Ethane': ethane = sheet else: ace = sheet # plotting sns.set() f, ax = plt.subplots(nrows=3, figsize=(12, 8)) sns.despine(f) plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.8) ax1 = sns.scatterplot(x='datetime', y='value', data=sheet, alpha=0.7, label='Original Data', ax=ax[0]) ax2 = sns.lineplot(x='datetime', y='function', data=sheet, linewidth=2, label='Fitted Function', ax=ax[0]) ax1.set_title(sheet.name + ' / Methane Ratio') ax1.set_xlabel('Datetime') ax1.set_ylabel('Mixing Ratio [ppb]') ax1.set(xlim=((min(sheet['datetime']) - dt.timedelta(days=10)), (max(sheet['datetime']) + dt.timedelta(days=10)))) ax1.set(ylim=(min(sheet['value']) - np.mean(sheet['value'] / 3), max(sheet['value']) + np.mean(sheet['value'] / 3))) ax2.get_lines()[0].set_color('purple') ax1.legend() ax3 = sns.scatterplot(x='datetime', y='resid', data=sheet, alpha=0.7, label='Normalized Residuals', ax=ax[1]) ax4 = sns.lineplot(x='datetime', y='residsmooth', data=sheet, linewidth=2, label='Smoothed Residual Fit', ax=ax[1]) ax4.get_lines()[0].set_color('purple') ax3.set_title('Normalized Residuals in ' + sheet.name) ax3.set_xlabel('Datetime') ax3.set_ylabel('Mixing Ratio [ppb]') ax3.set(xlim=((min(sheet['datetime']) - dt.timedelta(days=10)), (max(sheet['datetime']) + dt.timedelta(days=10)))) ax3.set(ylim=(np.mean(sheet['resid']) - np.std(sheet['resid']) * 8, np.mean(sheet['resid']) + np.std(sheet['resid']) * 8)) ax3.legend() # day of year plot residuals doy = [] for x in sheet['datetime']: tt = x.timetuple() doy.append(tt.tm_yday) sheet['DOY'] = doy ax5 = sns.scatterplot(x='DOY', y='resid', data=sheet, alpha=0.7, label='Normalized Residuals', ax=ax[2]) ax5.set_title('Normalized Residuals by Julian Day') ax5.set_xlabel('Day of Year') ax5.set_ylabel('Mixing Ratio [ppb]') ax5.set(xlim=((min(sheet['DOY'])), (max(sheet['DOY'])))) ax5.set(ylim=(np.mean(sheet['resid']) - np.std(sheet['resid']) * 8, np.mean(sheet['resid']) + np.std(sheet['resid']) * 8)) ax5.legend() direc = r'C:\Users\ARL\Desktop\J_Summit\analyses\Figures' + '\\' + sheet.name + 'Ratio.png' f.savefig(direc, format='png') # plotting seperate heatmap sns.set(style="white") sns.despine() combo = pd.merge_asof(ethane, ace, on='datetime', direction='nearest') combo = combo[combo['resid_y'] > -5] x = np.array(combo['resid_x']).reshape((-1, 1)) y = np.array(combo['resid_y']) model = LinearRegression().fit(x, y) # create liner regression fit rSquared = model.score(x, y) # assign coeff of determination slope = model.coef_ # assign slope g = sns.jointplot(combo['resid_x'], combo['resid_y'], kind='reg', color='#e65c00', line_kws={ 'label': 'rSquared: {:1.5f}\n Slope: {:1.5f}\n'.format( rSquared, slope[0]) }) g.set_axis_labels('Ethane MR [ppb]', 'Acetylene MR [ppb]', fontsize=12) g.fig.suptitle( 'Correlation between Ethane and Acetylene Normalized Residuals') g.ax_joint.get_lines()[0].set_color('blue') plt.legend() plt.show()
import seaborn as sns import datetime as dt # main script compounds = ['ethane', 'ethene', 'propane', 'propene', 'i_pentane', 'acetylene', 'n_pentane', 'i_butane', 'n_butane', 'hexane', 'benzene', 'toulene'] # compound list root = r'C:\Users\ARL\Desktop\J_Summit\analyses\Data' # data directory header = ['yr', 'value', 'function', 'resid'] # dataframe headers register_matplotlib_converters() for cpd in compounds: filename = root + '\\' + cpd + 'FIT.txt' # file ext data = readCsv(filename) data.columns = header # reset column names data = data[data['value'] > 0.0] normResid = data['resid'].values / data['value'].values data.drop(['resid'], axis=1, inplace=True) data['resid'] = normResid dates = decToDatetime(data['yr'].values) # call conv function data['datetime'] = dates # assign to DF data.drop('yr', axis=1, inplace=True) # trim a few extreme outliers values = data['value'].values # get the value col z = np.abs(stats.zscore(values)) # get the z score