def methane(): # import original dataset and new datasets methanePrev = loadExcel( r"C:\Users\ARL\Desktop\Jashan\Summit\analyses\Data\Methane.xlsx") methane2018 = loadExcel(r'C:\Users\ARL\Desktop\SUM_CH4_insitu_2018.xlsx') methane2019 = loadExcel( r'C:\Users\ARL\Desktop\Summit_GC_2019\CH4_results\SUM_CH4_insitu_2019.xlsx' ) # identify column names we want to keep goodcol = ['Decimal Year', 'Run median'] # good columns badcol = [x for x in methane2018.columns if x not in goodcol] # bad columns newnames = ['DecYear', 'MR'] for sheet in [methane2018, methane2019]: sheet.drop(badcol, axis=1, inplace=True) # drop bad columns sheet.dropna(how='any', axis=0, inplace=True) # drop NaN rows sheet.columns = newnames # assign same col names methanePrev = methanePrev[methanePrev['DecYear'] < 2018] # remove some pre 2018 vals comb = [methanePrev, methane2018, methane2019] # create combination frame methaneFinal = pd.concat(comb) # concat # trim extreme outliers values = methaneFinal['MR'].values z = np.abs(stats.zscore(values)) thresh = 5 methaneFinal = methaneFinal[~(z > thresh)] dates = decToDatetime(methaneFinal['DecYear'].values) # conv to datetime methaneFinal['datetime'] = dates # add to dataframe noaaMethane = pd.DataFrame(columns=['datetime', 'MR']) noaaMethane['datetime'], noaaMethane['MR'] = dates, methaneFinal[ 'MR'].values # noaa version noaaMethane = noaaDateConv(noaaMethane) noaaMethane.to_csv('methane2019updated.txt', header=None, index=None, sep=' ', mode='w+') return methaneFinal
def ethaneAce(): # Import Data Sets nmhcData = loadExcel(r"C:\Users\ARL\Desktop\Python Code\Data\NMHC.xlsx") # Cleaning Up Data nmhcData = nmhcData[nmhcData['DecYear'] > 2012] # Only need years past 2012 in VOC Data reqRows = ['DecYear', 'ethane', 'acetylene'] # only need date, ethane, and acetylene nmhcData = nmhcData[reqRows] # just get required rows nmhcData = nmhcData.dropna(axis=0, how='any') with open('ethaneOriginal.txt', 'w+') as f: for index, value in nmhcData.iterrows(): f.write('%f ' % value.DecYear) f.write('%f\n' % value.ethane) with open('aceOriginal.txt', 'w+') as f: for index, value in nmhcData.iterrows(): f.write('%f ' % value.DecYear) f.write('%f\n' % value.acetylene)
def ethaneAce(): # Import Data Sets homedir = r'C:\Users\ARL\Desktop\Jashan\SummitWildfireTracers' root = os.path.join(homedir, 'Data') nmhcData = loadExcel(os.path.join(root, 'NMHC.xlsx')) # Cleaning Up Data nmhcData = nmhcData[nmhcData['DecYear'] > 2012] # Only need years past 2012 in VOC Data reqRows = ['DecYear', 'ethane', 'acetylene'] # only need date, ethane, and acetylene nmhcData = nmhcData[reqRows] # just get required rows nmhcData = nmhcData.dropna(axis=0, how='any') with open('ethaneOriginal.txt', 'w+') as f: for index, value in nmhcData.iterrows(): f.write('%f ' % value.DecYear) f.write('%f\n' % value.ethane) with open('aceOriginal.txt', 'w+') as f: for index, value in nmhcData.iterrows(): f.write('%f ' % value.DecYear) f.write('%f\n' % value.acetylene)
[note] This was one of the first real scripts I made for the project, it is a poor coding example, but it gives an example of how much I've progressed since first learning Python """ # Import Libraries import numpy as np import matplotlib.pyplot as plt from matplotlib.pyplot import figure from fileLoading import loadExcel import os # import data homedir = r'C:\Users\ARL\Desktop\Jashan\SummitWildfireTracers' root = os.path.join(homedir, 'Data') nmhcData = loadExcel(os.path.join(root, 'NMHC.XLSX')) methaneData = loadExcel(os.path.join(root, 'Methane.XLSX')) # Plotting NMHC date = nmhcData.loc[:, 'DecYear'] # Variable describing the decimal Year numCompounds = np.linspace(0, 11, num=12) # There are 12 compounds we want to plots compounds = list(nmhcData.columns)[3:15] # List of the compound names numYears = np.linspace(2008, 2018, num=((2018 - 2008) + 1)) # number of years total for i in numCompounds: plt.figure(i) # Open a new fig for each compounds figure(num=None, figsize=(8, 6), dpi=160, facecolor='w', edgecolor='k') plt.xlabel('Day of Year', fontdict=None, labelpad=None, fontsize=14) # x labels all same
import pandas as pd from fileLoading import loadExcel, readCsv import os from dateConv import visitToDatetime, createDatetime import matplotlib.pyplot as plt import seaborn as sns import datetime as dt from pandas.plotting import register_matplotlib_converters import matplotlib.dates as mdates # For formatting date register_matplotlib_converters() # import the data root = r'C:\Users\ARL\Desktop\Jashan\Summit\analyses\Data' datapath = os.path.join(root, 'TAWO_visit_log.xlsx') visits = loadExcel(datapath) concpath = os.path.join(root, 'ethane.txt') ethane = readCsv(concpath) # data cleaning dates = visits['Date'].values dates = dates[1:] badcols = ['Initials', 'Unnamed: 5', 'Date'] visits.drop(badcols, axis=1, inplace=True) visits.drop([0], axis=0, inplace=True) visits.dropna(axis=0, how='all', inplace=True) visits.reset_index(drop=True, inplace=True) ethane.columns = ['yr', 'mo', 'dy', 'hr', 'na', 'val'] # create proper datetimes
""" This script makes a few modifications to the Methane data from 2012-2018. Eventually it will also import new 2019 data from the spreadsheet. Created on May 29th, 2019 """ # Import libraries from fileLoading import loadExcel import pandas as pd import numpy as np import matplotlib.pyplot as plt root = r'C:\Users\ARL\Desktop\J_Summit\analyses\HarmonicFit\textFiles' methaneData = loadExcel(r"C:\Users\ARL\Desktop\Python Code\Data\Methane.xlsx") methaneData = methaneData.dropna(axis=0, how='any') # Remove NaN values, entire row is removed # Remove extreme outliers flag1 = methaneData[methaneData['MR'] > 2100].index flag2 = methaneData[methaneData['MR'] < 1730].index methaneData = methaneData.drop(flag1) methaneData = methaneData.drop(flag2) print(methaneData.max()) print('-'*10) print(methaneData.min()) with open(root + r"\methaneARL_nofit.txt", 'w+') as f: for index, value in methaneData.iterrows(): f.write("%f " % value.DecYear) f.write("%f\n" % value.MR)
def nmhc(): start = time.time() # import original data set and new datasets homedir = r'C:\Users\ARL\Desktop\Jashan\SummitWildfireTracers' root = os.path.join(homedir, 'Data') nmhcPrev = loadExcel(os.path.join(root, 'NMHC.xlsx')) nmhc2018 = loadExcel(r'C:\Users\ARL\Desktop\Ambient_2018_V2.xlsx') nmhc2019 = loadExcel( r'C:\Users\ARL\Desktop\Summit_GC_2019\NMHC_results\Ambient_2019.xlsx') # identify the mixing ratio rows allrows = list(range(0, len(nmhc2018.index))) rowstokeep = list(range(70, 94)) rowstodrop = [x for x in allrows if x not in rowstokeep] # drop rows from nmhc2018 and nmhc2019 nmhc2018 = nmhc2018.drop(rowstodrop, axis=0) nmhc2019 = nmhc2019.drop(rowstodrop, axis=0) # drop unnecesary columns and rows with nan, then cols with nan dropcols = ['Unnamed: 1', 'Unnamed: 2', 'Unnamed: 3'] nmhc2018, nmhc2019 = nmhc2018.drop(dropcols, axis=1), nmhc2019.drop(dropcols, axis=1) nmhc2018 = nmhc2018.dropna( axis=0, how='all', subset=[x for x in nmhc2018.columns if x not in ['Unnamed: 0']]) nmhc2019 = nmhc2019.dropna( axis=0, how='all', subset=[x for x in nmhc2019.columns if x not in ['Unnamed: 0']]) # transpose, reset columns, drop first row and last row nmhc2018, nmhc2019 = nmhc2018.T.reset_index(), nmhc2019.T.reset_index() nmhc2018.columns, nmhc2019.columns = list(nmhc2018.loc[0]), list( nmhc2019.loc[0]) nmhc2018 = nmhc2018.drop([0, len(nmhc2018) - 1], axis=0) nmhc2019 = nmhc2019.drop([0, len(nmhc2019) - 1], axis=0) end = time.time() print('transposed in ', end - start) # create datetime column for each dataframe for yr in [nmhc2018, nmhc2019]: datetime = [] sampledate = yr['Unnamed: 0'][1] yearstr = str(sampledate)[:4] yearint = int(yearstr) # gets the year for x in yr[f'Decimal Day of Year {str(yearstr)[:4]}']: datetime.append(decToDatetime(x)) # call decyear conv yr['datetime'] = datetime # create datetime column for past data datetime = [] for x in nmhcPrev['DecYear']: datetime.append(decToDatetime(x)) nmhcPrev['datetime'] = datetime # remove old unneeded date columns for yr in [nmhc2018, nmhc2019]: sampledate = yr['Unnamed: 0'][1] yearstr = str(sampledate)[:4] badcols = [ 'Day', 'Hour', 'Minute', 'Unnamed: 0', f'Decimal Day of Year {str(yearstr)[:4]}' ] yr.drop(badcols, axis=1, inplace=True) badcols = ['DecYear', 'DOY', 'Ignore'] nmhcPrev.drop(badcols, axis=1, inplace=True) end = time.time() print('datetimes created in ', end - start) # combine all datasets into one dataframe nmhcPrev = nmhcPrev[nmhcPrev['datetime'] < dt.datetime(2018, 1, 1)] # remove 2018 nmhcPrev = nmhcPrev.append(nmhc2018) # add all 2018 nmhcPrev = nmhcPrev.append(nmhc2019) # add all 2019 end = time.time() print('datasets combined in ', end - start) # create textfiles for each NMHC compounds = [ 'ethane', 'ethene', 'propane', 'propene', 'i-butane', 'acetylene', 'n-butane', 'i-pentane', 'n-pentane', 'hexane', 'Benzene', 'Toluene' ] for cpd in compounds: values = nmhcPrev[cpd] # get the specfic cpd dates = nmhcPrev['datetime'] # get the specific datetimes final = pd.concat([dates, values], axis=1) final = final.dropna(axis=0, how='any') # drop the NANs final = final[final['datetime'] > dt.datetime( 2011, 1, 1)] # remove pre2012 values because of gap final = noaaDateConv(final) # conv date formats final.to_csv(f'{cpd}.txt', header=None, index=None, sep=' ', mode='w+') print(f'{cpd} file written') print('All Files Done')