Python readCsv示例，fileLoading.readCsv Python示例

示例#1

0

显示文件

def fireTrack():

    # import alternate data
    root = r'C:\Users\ARL\Desktop\Summit\analyses\Data'
    ace = readCsv(root + '\\' + r'aceRatioNoaa.txt')

    # import fire data
    virrs = True
    root = r'C:\Users\ARL\Desktop\FireData'
    if virrs:
        fire = pd.read_csv(root + r'\fire_archive_V1_60132.csv')
    else:
        fire = pd.read_csv(root + r'\fire_archive_M6_60131.csv')

    # data triming, reassign headers, add datetime column
    header = ['decyear', 'value', 'function', 'resid', 'residsmooth']
    ace.columns = header

    ace = ace[ace['value'] >= 0.00000001]

    ace['datetime'] = decToDatetime(ace['decyear'].values)
    ace['normResid'] = ace['resid'].values / ace['value'].values

    # combine fire and other dataset to produce master dataframe for analysis
    master = fireCombo(fire, ace, VIRRS=virrs)

    # identify average z score
    avg_vals = np.average(master['value_z'].values)
    avg_norms = np.average(master['normed_z'].values)

    print(f'The average z score in values is {avg_vals}')
    print(f'The average z score in normalized residuals is {avg_norms}')

    mybounds = {'x': (-73.2, -9.4), 'y': (57.8, 84.3)}

    # scatterplot mapping
    img = mpimg.imread(root + r'\greenland.PNG')

    if virrs:
        master.plot(kind='scatter',
                    x='longitude',
                    y='latitude',
                    c='bright_ti4',
                    cmap=plt.get_cmap('magma_r'),
                    colorbar=True,
                    figsize=(10, 7))
    else:
        master.plot(kind='scatter',
                    x='longitude',
                    y='latitude',
                    c='brightness',
                    cmap=plt.get_cmap('magma_r'),
                    colorbar=True,
                    figsize=(10, 7))

    plt.imshow(img,
               extent=[
                   mybounds['x'][0], mybounds['x'][1], mybounds['y'][0],
                   mybounds['y'][1]
               ],
               alpha=0.5)

    plt.xlabel('Longitude', fontsize=14)
    plt.ylabel('Latitude', fontsize=14)
    if virrs:
        plt.title('NASA VIIRS Fire Count Overlay on Greenland')
    else:
        plt.title('NASA MODIS Fire Count Overlay on Greenland')
    plt.legend()
    plt.show()

示例#2

0

显示文件

文件： gcMethane.py 项目： brendano257/Summit

def ch4plot():

    header = ['yr', 'value', 'function', 'resid',
              'residLine']  # dataframe headers
    root = r'C:\Users\ARL\Desktop\J_Summit\analyses\Data'
    filepath = root + '\\' + 'methane.txt'
    data = readCsv(filepath)
    data.columns = header

    register_matplotlib_converters()

    # convert the dec year col to datetime
    dates = decToDatetime(data['yr'])
    data['datetime'] = dates
    data.drop('yr', axis=1, inplace=True)

    # y bounds
    values = data['value']
    mean = np.mean(values)
    lowV = min(values) - (mean / 100)  # arbitrary vals look ok
    highV = max(values) + (mean / 100)

    mean = np.mean(data['resid'].values)
    lowR = min(data['resid']) - (mean / 3)
    highR = max(data['resid']) + (mean / 3)

    # x bounds
    low = min(data['datetime']) - dt.timedelta(days=30)
    high = max(data['datetime']) + dt.timedelta(days=30)

    # plotting
    sns.set()  # setup
    f, ax = plt.subplots(nrows=2, figsize=(12, 8))  # 2 column subplot
    sns.despine(f)
    plt.subplots_adjust(left=None,
                        bottom=None,
                        right=None,
                        top=None,
                        wspace=0.3,
                        hspace=0.5)

    # background data values with fitted harmonic functions
    ax1 = sns.scatterplot(x='datetime',
                          y='value',
                          data=data,
                          ax=ax[0],
                          alpha=0.7,
                          s=10,
                          legend='brief',
                          label='GC Data')
    ax2 = sns.lineplot(x='datetime',
                       y='function',
                       data=data,
                       ax=ax[0],
                       linewidth=2,
                       label='Fitted Curve')

    ax1.set_title('GC Methane Data with Fitted Function')
    ax1.set_xlabel('Date')
    ax1.set_ylabel('Mixing Ratio [ppb]')
    ax1.set(xlim=(low, high))
    ax1.set(ylim=(lowV, highV))
    ax1.get_lines()[0].set_color('#00b386')
    ax1.legend()

    # residual data
    ax3 = sns.scatterplot(x='datetime',
                          y='resid',
                          data=data,
                          ax=ax[1],
                          alpha=1,
                          s=10,
                          legend='brief',
                          label='Residuals from Fit')
    ax4 = sns.lineplot(x='datetime',
                       y='residLine',
                       data=data,
                       ax=ax[1],
                       linewidth=2,
                       label='Fitted Residual Curve')
    ax3.set_title('GC Residuals from Fitted Function')
    ax3.set_xlabel('Date')
    ax3.set_ylabel('Mixing Ratio [ppb]')
    ax4.get_lines()[0].set_color('#00b386')
    ax3.legend()
    ax3.set(xlim=(low, high))
    ax3.set(ylim=(lowR, highR))

    # save the plots
    direc = r'C:\Users\ARL\Desktop\J_Summit\analyses\Figures' + '\\' + 'methane.png'
    f.savefig(direc, format='png')

示例#3

0

显示文件

import os
from dateConv import visitToDatetime, createDatetime
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as dt
from pandas.plotting import register_matplotlib_converters
import matplotlib.dates as mdates  # For formatting date

register_matplotlib_converters()

# import the data
root = r'C:\Users\ARL\Desktop\Jashan\Summit\analyses\Data'
datapath = os.path.join(root, 'TAWO_visit_log.xlsx')
visits = loadExcel(datapath)
concpath = os.path.join(root, 'ethane.txt')
ethane = readCsv(concpath)

# data cleaning
dates = visits['Date'].values
dates = dates[1:]
badcols = ['Initials', 'Unnamed: 5', 'Date']
visits.drop(badcols, axis=1, inplace=True)
visits.drop([0], axis=0, inplace=True)
visits.dropna(axis=0, how='all', inplace=True)
visits.reset_index(drop=True, inplace=True)

ethane.columns = ['yr', 'mo', 'dy', 'hr', 'na', 'val']

# create proper datetimes
visits['start'], visits['end'] = visitToDatetime(
    dates, visits['Arrival time (Z)'].values,

示例#4

0

显示文件

文件： ratioPlotter.py 项目： JashanChopra/SummitWildfireTracers

def plotratios(hours, ethane=True, all=True, summer=True, viirs=True):
    """
    plotratios is a function that imports either the acetylene/methane ratio or the ethane/methane ratio data and
    plots it, various conditions can be set.

    :param hours: Number of back trajectory hours ran with Hysplit, used for plot titles
    :param ethane: Default True. Set to false for acetylene data.
    :param all: Default True, uses all data. Set to false to cut z scores below 3.
    :param summer: Default True, cuts winter data. Set to false to use only winter data and cut summer data
    :param viirs: Default True, uses viirs fire data. Set to false to use MODIS C6 data

    :return: nothing, displays plot with plt.show()
    """

    # Create titles and set data path depending on options
    dataroot = r'C:\Users\ARL\Desktop\Jashan\Summit\analyses\Data'                     # data directory
    trajroot = r'C:\Users\ARL\Desktop\Jashan\Jashan PySplit\pysplitprocessor-master\pysplitprocessor\messeduptime_notUTC'
    if ethane:
        if all:
            root = os.path.join(trajroot, 'ethane_methane_all')
            title = f'{hours}h Back Trajectories of Ethane/Methane Ratio, 2012-2019'
            sheet = readCsv(dataroot + r'\ethaneRatioNoaa.txt')
        else:
            title = f'{hours}h Back Trajectories of Ethane/Methane Ratio Outliers, 2012-2019'
            sheet = readCsv(dataroot + r'\ethaneRatioNoaa.txt')
    else:
        if all:
            root = r'C:\Users\ARL\Desktop\Jashan\Jashan ' \
                   r'PySplit\pysplitprocessor-master\pysplitprocessor\aceTraj'
            title = f'{hours}h Back Trajectories of Acetylene/Methane Ratio, 2012-2018'
            sheet = readCsv(dataroot + r'\aceRatioNoaa.txt')
        else:
            title = f'{hours}h Back Trajectories of Acetylene/Methane Ratio Outliers, 2012-2019'
            root = r'C:\Users\ARL\Desktop\Jashan ' \
                   r'PySplit\pysplitprocessor-master\pysplitprocessor\ace_methane_traj_highz'
            sheet = readCsv(dataroot + r'\aceRatioNoaa.txt')

    header = ['decyear', 'value', 'function', 'resid', 'residsmooth']           # create header
    sheet.columns = header                                                      # assign column names
    sheet = sheet[sheet['value'] >= 0.000001]                                   # remove zero values

    sheet['datetime'] = decToDatetime(sheet['decyear'].values)                  # create datetimes from decyear
    sheet['datetime'] = sheet['datetime'] + pd.Timedelta('3 hours')             # convert tz to UTC

    dates = sheet['datetime'].tolist()                                          # put datetimes in list
    julian = []                                                                 # preallocate
    for d in dates:                                                             # loop over each date
        tt = d.timetuple()                                                      # create a timetuple from date
        jul = tt.tm_yday                                                        # get the julian year
        julian.append(jul)                                                      # append that to a list
    sheet['julian'] = julian                                                    # add to dataframe

    cutoffs = (120, 305)
    if summer:
        keep = np.logical_and(sheet['julian'] >= cutoffs[0],                    # find just summer values
                              sheet['julian'] <= cutoffs[1])
        print('-- Winter Data Removed')
    else:
        keep = ~(np.logical_and(sheet['julian'] >= cutoffs[0],                  # find just winter values
                                sheet['julian'] <= cutoffs[1]))
        print('-- Summer Data Removed')
    sheet = sheet[keep]

    dropcols = ['decyear', 'function', 'residsmooth']                           # columns to drop
    sheet.drop(dropcols, axis=1, inplace=True)                                  # drop unused columns

    # remove slow data or data above 342, below 72 degrees at Summit camp due to possible pollution
    sheetClean = metRemove(sheet, 1, dropMet=True)

    residuals = sheetClean['resid'].values                                      # numpy array of resid
    z = np.abs(stats.zscore(residuals))                                         # calculate z scores
    sheetClean['zscores'] = z                                                   # assign as column
    if all:
        thresh = 0                                                              # z score threshold
    else:
        thresh = 3
    sheetZ = sheetClean[z > thresh]                                             # remove non outliers
    sheetZ.reset_index(drop=True, inplace=True)

    trajPlot(root, title=title, zscores=sheetZ, viirs=viirs, summer=summer)

示例#5

0

显示文件

"""
import numpy as np

from trajectoryPlotting import trajPlot
from fileLoading import readCsv
from dateConv import decToDatetime
from metRemove import metRemove
from scipy import stats

hours = 72
title = f'{hours}h Back Trajectories of Acetylene/Methane Ratio Outliers, 2012-2018'
root = r'C:\Users\ARL\Desktop\Jashan PySplit\pysplitprocessor-master\pysplitprocessor\ace_methane_traj'

dataroot = r'C:\Users\ARL\Desktop\Summit\analyses\Data'  # data directory
ace = readCsv(dataroot + r'\aceRatioNoaa.txt')  # data read in acetylene

header = ['decyear', 'value', 'function', 'resid',
          'residsmooth']  # assign column names
ace.columns = header
ace = ace[ace['value'] >= 0.00000001]

ace['datetime'] = decToDatetime(
    ace['decyear'].values)  # create datetimes from decyear

dates = ace['datetime'].tolist()  # put datetimes in a list
julian = []  # preallocate julian day list
for d in dates:  # loop over each date
    tt = d.timetuple()  # create a timetuple
    jul = tt.tm_yday  # identify julian day
    julian.append(jul)  # append to list

示例#6

0

显示文件

文件： ethaneAceRatios.py 项目： JashanChopra/SummitWildfireTracers

def ratioPlot():
    register_matplotlib_converters()

    # import data
    homedir = r'C:\Users\ARL\Desktop\Jashan\SummitWildfireTracers'
    root = os.path.join(homedir, 'Data')
    ethane = readCsv(root + r'\ethaneRatioNoaa.txt')
    ace = readCsv(root + r'\aceRatioNoaa.txt')

    # data trimming, reassign headers, add datetime column
    header = ['decyear', 'value', 'function', 'resid', 'residsmooth']

    for sheet in [ethane, ace]:
        sheet.columns = header

    ethane = ethane[ethane['value'] >= 0.0000001]
    ace = ace[ace['value'] >= 0.00000001]
    ethane.name = 'Ethane'
    ace.name = 'Acetylene'

    for sheet in [ethane, ace]:
        sheet['datetime'] = decToDatetime(sheet['decyear'].values)

        if sheet.name == 'Ethane':
            ethane = sheet
        else:
            ace = sheet

        # plotting
        sns.set()
        f, ax = plt.subplots(nrows=3, figsize=(12, 8))
        sns.despine(f)
        plt.subplots_adjust(left=None,
                            bottom=None,
                            right=None,
                            top=None,
                            wspace=None,
                            hspace=0.8)
        ax1 = sns.scatterplot(x='datetime',
                              y='value',
                              data=sheet,
                              alpha=0.7,
                              label='Original Data',
                              ax=ax[0])
        ax2 = sns.lineplot(x='datetime',
                           y='function',
                           data=sheet,
                           linewidth=2,
                           label='Fitted Function',
                           ax=ax[0])
        ax1.set_title(sheet.name + ' / Methane Ratio', size=26)
        ax1.set_xlabel('Datetime', fontsize=22)
        ax1.set_ylabel('Ratio Value', fontsize=18)
        ax1.set(xlim=((min(sheet['datetime']) - dt.timedelta(days=10)),
                      (max(sheet['datetime']) + dt.timedelta(days=10))))
        ax1.set(ylim=(min(sheet['value']) - np.mean(sheet['value'] / 3),
                      max(sheet['value']) + np.mean(sheet['value'] / 3)))
        ax2.get_lines()[0].set_color('purple')
        ax1.legend(prop={'size': 14})

        ax3 = sns.scatterplot(x='datetime',
                              y='resid',
                              data=sheet,
                              alpha=0.7,
                              label='Residuals',
                              ax=ax[1])
        ax4 = sns.lineplot(x='datetime',
                           y='residsmooth',
                           data=sheet,
                           linewidth=2,
                           label='Smoothed Residual Fit',
                           ax=ax[1])
        ax4.get_lines()[0].set_color('purple')
        ax3.set_title('Residuals in ' + sheet.name, size=26)
        ax3.set_xlabel('Datetime', fontsize=22)
        ax3.set_ylabel('Residual / Value', fontsize=18)
        ax3.set(xlim=((min(sheet['datetime']) - dt.timedelta(days=10)),
                      (max(sheet['datetime']) + dt.timedelta(days=10))))
        ax3.set(ylim=(np.mean(sheet['resid']) - np.std(sheet['resid']) * 8,
                      np.mean(sheet['resid']) + np.std(sheet['resid']) * 8))
        ax3.legend(prop={'size': 14})

        # day of year plot residuals
        doy = []
        for x in sheet['datetime']:
            tt = x.timetuple()
            doy.append(tt.tm_yday)
        sheet['DOY'] = doy

        ax5 = sns.scatterplot(x='DOY',
                              y='resid',
                              data=sheet,
                              alpha=0.7,
                              label='Residuals',
                              ax=ax[2])
        ax5.set_title('Residuals by Julian Day', size=26)
        ax5.set_xlabel('Day of Year', fontsize=22)
        ax5.set_ylabel('Residual / Value', fontsize=18)
        ax5.set(xlim=((min(sheet['DOY'])), (max(sheet['DOY']))))
        ax5.set(ylim=(np.mean(sheet['resid']) - np.std(sheet['resid']) * 8,
                      np.mean(sheet['resid']) + np.std(sheet['resid']) * 8))
        ax5.legend(prop={'size': 14})

        direc = os.path.join(homedir,
                             'Figures') + '\\' + sheet.name + 'Ratio.png'
        f.savefig(direc, format='png')

        for ax in [ax1, ax2, ax3, ax4, ax5]:
            ax.tick_params(labelsize=18)

    matplotlib.rc("legend", fontsize=26)

    # plotting separate heatmap
    sns.set(style="white", font_scale=1.5)
    sns.despine()
    combo = pd.merge_asof(ethane, ace, on='datetime', direction='nearest')
    combo.dropna(axis=0, inplace=True, how='any')
    combo.drop(combo.index[5586:5776], axis=0, inplace=True)

    x = np.array(combo['resid_x']).reshape((-1, 1))
    y = np.array(combo['resid_y'])

    model = LinearRegression().fit(x, y)  # create liner regression fit
    rSquared = model.score(x, y)  # assign coeff of determination
    slope = model.coef_  # assign slope

    g = sns.jointplot(combo['resid_x'],
                      combo['resid_y'],
                      kind='reg',
                      color='#e65c00',
                      line_kws={
                          'label':
                          'rSquared: {:1.5f}\n Slope: {:1.5f}\n'.format(
                              rSquared, slope[0])
                      })
    g.set_axis_labels('Ethane/Methane Ratio',
                      'Acetylene/Methane Ratio',
                      fontsize=20)
    plt.tick_params(axis='both', labelsize=18)
    g.fig.suptitle('Correlation between Ethane and Acetylene Ratio Residuals',
                   fontsize=28)
    g.ax_joint.get_lines()[0].set_color('blue')
    plt.legend()
    plt.show()

示例#7

0

显示文件

文件： ethaneAceRatios.py 项目： brendano257/Summit

def ratioPlot():
    register_matplotlib_converters()

    # import data
    root = r'C:\Users\ARL\Desktop\J_Summit\analyses\Data'
    ethane = readCsv(root + r'\ethaneRatioNoaa.txt')
    ace = readCsv(root + r'\aceRatioNoaa.txt')

    # data triming, reassign headers, add datetime column
    header = ['decyear', 'value', 'function', 'resid', 'residsmooth']

    for sheet in [ethane, ace]:
        sheet.columns = header

    ethane = ethane[ethane['value'] >= 0.0000001]
    ace = ace[ace['value'] >= 0.00000001]
    ethane.name = 'Ethane'
    ace.name = 'Acetylene'

    for sheet in [ethane, ace]:

        sheet['datetime'] = decToDatetime(sheet['decyear'].values)

        normResid = sheet['resid'].values / sheet['value'].values
        normSmooth = sheet['residsmooth'].values / sheet['value'].values

        sheet.drop(['resid', 'residsmooth'], axis=1, inplace=True)
        sheet['resid'] = normResid
        sheet['residsmooth'] = normSmooth

        if sheet.name == 'Ethane':
            ethane = sheet
        else:
            ace = sheet

        # plotting
        sns.set()
        f, ax = plt.subplots(nrows=3, figsize=(12, 8))
        sns.despine(f)
        plt.subplots_adjust(left=None,
                            bottom=None,
                            right=None,
                            top=None,
                            wspace=None,
                            hspace=0.8)
        ax1 = sns.scatterplot(x='datetime',
                              y='value',
                              data=sheet,
                              alpha=0.7,
                              label='Original Data',
                              ax=ax[0])
        ax2 = sns.lineplot(x='datetime',
                           y='function',
                           data=sheet,
                           linewidth=2,
                           label='Fitted Function',
                           ax=ax[0])
        ax1.set_title(sheet.name + ' / Methane Ratio')
        ax1.set_xlabel('Datetime')
        ax1.set_ylabel('Mixing Ratio [ppb]')
        ax1.set(xlim=((min(sheet['datetime']) - dt.timedelta(days=10)),
                      (max(sheet['datetime']) + dt.timedelta(days=10))))
        ax1.set(ylim=(min(sheet['value']) - np.mean(sheet['value'] / 3),
                      max(sheet['value']) + np.mean(sheet['value'] / 3)))
        ax2.get_lines()[0].set_color('purple')
        ax1.legend()

        ax3 = sns.scatterplot(x='datetime',
                              y='resid',
                              data=sheet,
                              alpha=0.7,
                              label='Normalized Residuals',
                              ax=ax[1])
        ax4 = sns.lineplot(x='datetime',
                           y='residsmooth',
                           data=sheet,
                           linewidth=2,
                           label='Smoothed Residual Fit',
                           ax=ax[1])
        ax4.get_lines()[0].set_color('purple')
        ax3.set_title('Normalized Residuals in ' + sheet.name)
        ax3.set_xlabel('Datetime')
        ax3.set_ylabel('Mixing Ratio [ppb]')
        ax3.set(xlim=((min(sheet['datetime']) - dt.timedelta(days=10)),
                      (max(sheet['datetime']) + dt.timedelta(days=10))))
        ax3.set(ylim=(np.mean(sheet['resid']) - np.std(sheet['resid']) * 8,
                      np.mean(sheet['resid']) + np.std(sheet['resid']) * 8))
        ax3.legend()

        # day of year plot residuals
        doy = []
        for x in sheet['datetime']:
            tt = x.timetuple()
            doy.append(tt.tm_yday)
        sheet['DOY'] = doy

        ax5 = sns.scatterplot(x='DOY',
                              y='resid',
                              data=sheet,
                              alpha=0.7,
                              label='Normalized Residuals',
                              ax=ax[2])
        ax5.set_title('Normalized Residuals by Julian Day')
        ax5.set_xlabel('Day of Year')
        ax5.set_ylabel('Mixing Ratio [ppb]')
        ax5.set(xlim=((min(sheet['DOY'])), (max(sheet['DOY']))))
        ax5.set(ylim=(np.mean(sheet['resid']) - np.std(sheet['resid']) * 8,
                      np.mean(sheet['resid']) + np.std(sheet['resid']) * 8))
        ax5.legend()

        direc = r'C:\Users\ARL\Desktop\J_Summit\analyses\Figures' + '\\' + sheet.name + 'Ratio.png'
        f.savefig(direc, format='png')

    # plotting seperate heatmap
    sns.set(style="white")
    sns.despine()
    combo = pd.merge_asof(ethane, ace, on='datetime', direction='nearest')
    combo = combo[combo['resid_y'] > -5]

    x = np.array(combo['resid_x']).reshape((-1, 1))
    y = np.array(combo['resid_y'])

    model = LinearRegression().fit(x, y)  # create liner regression fit
    rSquared = model.score(x, y)  # assign coeff of determination
    slope = model.coef_  # assign slope

    g = sns.jointplot(combo['resid_x'],
                      combo['resid_y'],
                      kind='reg',
                      color='#e65c00',
                      line_kws={
                          'label':
                          'rSquared: {:1.5f}\n Slope: {:1.5f}\n'.format(
                              rSquared, slope[0])
                      })
    g.set_axis_labels('Ethane MR [ppb]', 'Acetylene MR [ppb]', fontsize=12)
    g.fig.suptitle(
        'Correlation between Ethane and Acetylene Normalized Residuals')
    g.ax_joint.get_lines()[0].set_color('blue')
    plt.legend()
    plt.show()

示例#8

0

显示文件

import seaborn as sns
import datetime as dt

# main script

compounds = ['ethane', 'ethene', 'propane', 'propene', 'i_pentane', 'acetylene', 'n_pentane', 'i_butane', 'n_butane',
             'hexane', 'benzene', 'toulene']                                                # compound list

root = r'C:\Users\ARL\Desktop\J_Summit\analyses\Data'                                       # data directory
header = ['yr', 'value', 'function', 'resid']                                               # dataframe headers

register_matplotlib_converters()

for cpd in compounds:
    filename = root + '\\' + cpd + 'FIT.txt'                                                # file ext
    data = readCsv(filename)
    data.columns = header                                                                   # reset column names

    data = data[data['value'] > 0.0]

    normResid = data['resid'].values / data['value'].values
    data.drop(['resid'], axis=1, inplace=True)
    data['resid'] = normResid

    dates = decToDatetime(data['yr'].values)                                                # call conv function
    data['datetime'] = dates                                                                # assign to DF
    data.drop('yr', axis=1, inplace=True)

    # trim a few extreme outliers
    values = data['value'].values                                                           # get the value col
    z = np.abs(stats.zscore(values))                                                        # get the z score