示例#1
0
def insertAMTCruiseTraj():
    server = 'Rainier'
    tableName = 'tblCruise_Trajectory'
    usecols = ['Cruise_name', 'time', 'lat', 'lon']
    rawFilePath = cfgv.rep_AMT_cruises_raw + 'amt/'
    rawFileName = 'master_AMT.csv'
    path = rawFilePath + rawFileName
    exportBase = cfgv.opedia_proj + 'db/dbInsert/export/'
    os.chdir(rawFilePath)

    df = pd.read_csv(rawFilePath + rawFileName, sep=',', usecols=usecols)
    for Cruise_name in df['Cruise_name'].unique():
        export_path = '%s%s.csv' % (exportBase, Cruise_name)

        print(Cruise_name)

        cruise_df = df[df['Cruise_name'] ==
                       Cruise_name]  #selects only df of cruise
        Cruise_ID = iF.findID_CRUISE(Cruise_name[0:3] + Cruise_name[-2:])
        cruise_df['Cruise_ID'] = Cruise_ID
        cruise_df = ip.removeMissings(['time', 'lat', 'lon'], cruise_df)
        cruise_df = ip.convertYYYYMMDD(cruise_df)
        cruise_df = ip.NaNtoNone(cruise_df)
        cruise_df = ip.colDatatypes(cruise_df)
        cruise_df = ip.convertYYYYMMDD(cruise_df)
        cruise_df = ip.removeDuplicates(cruise_df)
        cruise_df = cruise_df[['Cruise_ID', 'time', 'lat', 'lon']]
        cruise_df.to_csv(export_path, index=False)
        ip.sortByTimeLatLon(cruise_df, export_path, 'time', 'lat', 'lon')

        print('export path: ', Cruise_name + export_path)
        iF.toSQLbcp(export_path, tableName, server)
示例#2
0
def insertSeaFlowCruiseTraj():
    server = 'Rainier'
    tableName = 'tblCruise_Trajectory'
    rawFilePath = cfgv.rep_allSeaFlowCruises_raw
    os.chdir(rawFilePath)
    sfl_cruise_list = glob.glob('*.sfl*')
    usecols_sfl = ['DATE', 'LAT', 'LON']
    for cruise in sfl_cruise_list:
        prefix = cruise[:-8] + '_traj'
        rawFileName = cruise
        path = rawFilePath + rawFileName
        exportBase = cfgv.opedia_proj + 'db/dbInsert/export/'
        export_path = '%s%s.csv' % (exportBase, prefix)
        print(cruise)
        Cruise_ID = iF.findID_CRUISE(cruise[:-8])
        df = pd.read_csv(cruise, sep='\t', usecols=usecols_sfl)
        df['DATE'] = pd.to_datetime(df['DATE'], format='%Y-%m-%dT%H:%M:%S')
        df['Cruise_ID'] = Cruise_ID
        df.rename(columns={
            'DATE': 'time',
            'LAT': 'lat',
            'LON': 'lon'
        },
                  inplace=True)
        df = df[['Cruise_ID', 'time', 'lat', 'lon']]
        df = ip.removeMissings(['time', 'lat', 'lon'], df)
        df = ip.NaNtoNone(df)
        df = ip.colDatatypes(df)
        df = ip.convertYYYYMMDD(df)
        df = ip.removeDuplicates(df)
        df.to_csv(export_path, index=False)
        ip.sortByTimeLatLon(df, export_path, 'time', 'lat', 'lon')
        print('export path: ', export_path)
        # print(export_path,tableName)
        iF.toSQLbcp(export_path, tableName, server)
def makeGLODAP(rawFilePath, rawFileName, tableName):
    path = rawFilePath + rawFileName
    prefix = tableName
    exportBase = cfgv.opedia_proj + 'db/dbInsert/export/'
    export_path = '%s%s.csv' % (exportBase, prefix)
    df = pd.read_csv(path, sep=',', usecols=usecols)

    df['year'] = df['year'].astype('int').astype(
        'str')  # removing ending zero, then str
    df['month'] = df['month'].astype('int').astype('str')
    df['day'] = df['day'].astype('int').astype('str')
    df['hour'] = df['hour'].astype('int').astype('str')
    df['minute'] = df['minute'].astype('int').astype('str')
    df['second'] = '0'
    #construct datetime
    df['time'] = pd.to_datetime(
        df[['year', 'month', 'day', 'hour', 'minute', 'second']],
        format='%Y%m%dT%H%M%S')

    ip.renameCol(df, 'latitude', 'lat')
    ip.renameCol(df, 'longitude', 'lon')
    # renaming Variables
    ip.renameCol(df, 'theta', 'theta_potential_temperature')
    ip.renameCol(df, 'sigma0', 'sigma0_potential_density')
    ip.renameCol(df, 'sigma1', 'sigma1_potential_density_ref_1000_dbar')
    ip.renameCol(df, 'sigma2', 'sigma2_potential_density_ref_2000_dbar')
    ip.renameCol(df, 'sigma3', 'sigma3_potential_density_ref_3000_dbar')
    ip.renameCol(df, 'sigma4', 'sigma4_potential_density_ref_4000_dbar')

    ip.renameCol(df, 'gamma', 'gamma_neutral_density')
    ip.renameCol(df, 'TAlk', 'TAlk_total_alkalinity')
    ip.renameCol(df, 'phts25p0', 'phts25p0_pH_25C_0dbar')
    ip.renameCol(df, 'phtsinsitutp', 'phtsinsitutp_pH_insitu')
    ip.renameCol(df, 'latitude', 'lat')
    ip.renameCol(df, 'latitude', 'lat')
    ip.renameCol(df, 'latitude', 'lat')
    ip.renameCol(df, 'latitude', 'lat')
    ip.renameCol(df, 'latitude', 'lat')
    ip.renameCol(df, 'latitude', 'lat')

    #import cruise data to ID file and do join
    expocodes = pd.read_csv(rawFilePath + rawFileName_expocodes,
                            sep='\t',
                            names=['cruise_ID', 'expocode'])
    df = pd.merge(df, expocodes, left_on='cruise', right_on='cruise_ID')
    df = df.drop('cruise_ID', 1)
    ip.renameCol(df, 'expocode', 'cruise_expocode')

    df = ip.arrangeColumns(usecols_rearange, df)
    df = ip.removeMissings(['time', 'lat', 'lon', 'depth'], df)
    df = ip.NaNtoNone(df)
    df = ip.colDatatypes(df)
    df = ip.convertYYYYMMDD(df)
    df = ip.addIDcol(df)
    df.to_csv(export_path, index=False)
    ip.sortByTimeLatLonDepth(df, export_path, 'time', 'lat', 'lon', 'depth')
    print('export path: ', export_path)
    return export_path, df
示例#4
0
def makeWOA_climatology(rawFilePath, tableName):
    for month in month_list: # ie 1 = jan
        print('Month: ' + month)
        df = merge_WOA_variables(month)
        df = ip.removeColumn(['I_gp', 'C_gp'], df)
        df = ip.removeMissings(['lat', 'lon', 'depth'], df)
        df = ip.NaNtoNone(df)
        df = ip.addIDcol(df)
        df.sort_values(['lat', 'lon', 'depth'], ascending=[True, True, True], inplace=True)

        df.to_csv(exportBase + tableName + '_' +  month + '.csv', index=False)
        print('export path: ' , exportBase + tableName + '_' +  month + '.csv')
        iF.toSQLbcp(exportBase + tableName + '_' +  month + '.csv', tableName)
示例#5
0
def makeHL2A_diel_metagenomics(rawFilePath, rawFileName, tableName):
    path = rawFilePath + rawFileName
    prefix = tableName
    exportBase = cfgv.opedia_proj + 'db/dbInsert/export/'
    export_path = '%s%s.csv' % (exportBase, prefix)
    df = pd.read_excel(path,  sep=',',sheet_name='data', usecols=usecols)
    df = ip.removeMissings(['time','lat', 'lon','depth'], df)
    df = ip.NaNtoNone(df)
    df = ip.colDatatypes(df)
    df = ip.addIDcol(df)
    df = ip.removeDuplicates(df)
    df.to_csv(export_path, index=False)
    ip.sortByTimeLatLonDepth(df, export_path, 'time', 'lat', 'lon', 'depth')
    print('export path: ' ,export_path)
    return export_path
示例#6
0
def makeSeaFlow(rawFilePath, rawFileName, tableName):
    path = rawFilePath + rawFileName
    prefix = tableName
    exportBase = cfgv.opedia_proj + 'db/dbInsert/export/'
    export_path = '%s%s.csv' % (exportBase, prefix)
    df = pd.read_csv(path, sep=',')
    df = df[df['flag'] == 0]

    df['prochloro_abundance'] = np.where(((df['pop'] == 'prochloro') & (df['quantile'] == 50)), df['abundance'], np.nan)
    df['prochloro_diameter'] = np.where(df['pop'] == 'prochloro', df['diam_mid'], np.nan)
    df['prochloro_carbon_content'] = np.where(df['pop'] == 'prochloro', df['Qc_mid'], np.nan)
    df['prochloro_biomass'] = df['prochloro_abundance'].astype(float) * df['prochloro_carbon_content'].astype(float)

    df['synecho_abundance'] = np.where(((df['pop'] == 'synecho') & (df['quantile'] == 50)), df['abundance'], np.nan)
    df['synecho_diameter'] = np.where(df['pop'] == 'synecho', df['diam_mid'], np.nan)
    df['synecho_carbon_content'] = np.where(df['pop'] == 'synecho', df['Qc_mid'], np.nan)
    df['synecho_biomass'] = df['synecho_abundance'] * df['synecho_carbon_content']

    df['croco_abundance'] = np.where(((df['pop'] == 'croco') & (df['quantile'] == 50)), df['abundance'], np.nan)
    df['croco_diameter'] = np.where(df['pop'] == 'croco', df['diam_mid'], np.nan)
    df['croco_carbon_content'] = np.where(df['pop'] == 'croco', df['Qc_mid'], np.nan)
    df['croco_biomass'] = df['croco_abundance'] * df['croco_carbon_content']

    df['picoeuk_abundance'] = np.where(((df['pop'] == 'picoeuk') & (df['quantile'] == 50)), df['abundance'], np.nan)
    df['picoeuk_diameter'] = np.where(df['pop'] == 'picoeuk', df['diam_mid'], np.nan)
    df['picoeuk_carbon_content'] = np.where(df['pop'] == 'picoeuk', df['Qc_mid'], np.nan)
    df['picoeuk_biomass'] = df['picoeuk_abundance'] * df['picoeuk_carbon_content']

    df['unknown_abundance'] = np.where(((df['pop'] == 'unknown') & (df['quantile'] == 50)), df['abundance'], np.nan)
    df['unknown_diameter'] = np.where(df['pop'] == 'unknown', df['diam_mid'], np.nan)
    df['unknown_carbon_content'] = np.where(df['pop'] == 'unknown', df['Qc_mid'], np.nan)
    df['unknown_biomass'] = df['unknown_abundance'] * df['unknown_carbon_content']

    df['total_abundance'] = np.where(((df['pop'] != 'beads') & (df['quantile'] == 50)), df['abundance'], np.nan)
    df['total_carbon_content'] = np.where(df['pop'] != 'beads', df['Qc_mid'], np.nan)
    df['total_biomass'] = df['total_abundance'] * df['total_carbon_content']

    df = ip.arrangeColumns(['time', 'lat', 'lon', 'depth', 'prochloro_abundance', 'prochloro_diameter', 'prochloro_carbon_content', 'prochloro_biomass','synecho_abundance', 'synecho_diameter', 'synecho_carbon_content', 'synecho_biomass','croco_abundance', 'croco_diameter', 'croco_carbon_content', 'croco_biomass','picoeuk_abundance', 'picoeuk_diameter', 'picoeuk_carbon_content', 'picoeuk_biomass','unknown_abundance', 'unknown_diameter', 'unknown_carbon_content', 'unknown_biomass','total_biomass', 'par'], df)

    df = ip.removeMissings(['time','lat', 'lon'], df)
    df = ip.NaNtoNone(df)
    df = ip.colDatatypes(df)
    df = ip.convertYYYYMMDD(df)
    df = ip.addIDcol(df)
    df.to_csv(export_path, index=False)
    ip.sortByTimeLatLon(df, export_path, 'time', 'lat', 'lon')
    print('export path: ' ,export_path)
    return export_path
示例#7
0
def makeSingleCellGenomes_Chisholm(rawFilePath, rawFileName, tableName):
    path = rawFilePath + rawFileName
    prefix = tableName
    exportBase = cfgv.opedia_proj + 'db/dbInsert/export/'
    export_path = '%s%s.csv' % (exportBase, prefix)
    df = pd.read_excel(path, 'data')
    df = ip.removeMissings(['time', 'lat', 'lon', 'depth'], df)
    df = ip.NaNtoNone(df)
    df = ip.colDatatypes(df)
    df = ip.convertYYYYMMDD(df)
    df = ip.addIDcol(df)
    df = ip.removeDuplicates(df)
    df.to_csv(export_path, index=False)
    ip.sortByTimeLatLonDepth(df, export_path, 'time', 'lat', 'lon', 'depth')
    df.to_csv(export_path, index=False)
    print('export path: ', export_path)
    return export_path
示例#8
0
def makeCTD_Chisholm(rawFilePath, rawFileName, tableName):
    path = rawFilePath + rawFileName
    prefix = tableName
    df = pd.read_excel(path, 'data')

    df = ip.removeMissings(['time', 'lat', 'lon', 'depth'], df)
    df = ip.colDatatypes(df)
    df = ip.NaNtoNone(df)
    df['ID'] = None

    exportBase = cfgv.opedia_proj + 'db/dbInsert/export/'
    export_path = '%s%s.csv' % (exportBase, prefix)
    ip.sortByDepthLatLon(df, export_path, 'lon', 'lat', 'depth')
    print(df.dtypes)

    df.to_csv(export_path, index=False)
    print('export path: ', export_path)
    return export_path
示例#9
0
def makeFlombaum(rawFilePath, rawFileName, tableName):
    path = rawFilePath + rawFileName
    prefix = tableName
    exportBase = cfgv.opedia_proj + 'db/dbInsert/export/'
    export_path = '%s%s.csv' % (exportBase, prefix)
    df = pd.read_excel(path, sep=',', sheet_name='data')
    df = ip.removeMissings(['time', 'lat', 'lon', 'depth'], df)
    df = ip.NaNtoNone(df)
    df = ip.colDatatypes(df)
    df = ip.convertYYYYMMDD(df)
    df = ip.addIDcol(df)
    df = ip.removeDuplicates(df)
    df['lon'] = df['lon'].abs()
    df.to_csv(export_path, index=False)
    ip.mapTo180180(export_path, 'lon')
    ip.sortByTimeLatLonDepth(df, export_path, 'time', 'lat', 'lon', 'depth')
    print('export path: ', export_path)
    return export_path
示例#10
0
def insertSeaFlowCruiseSalinity():
    server = 'Rainier'
    tableName = 'tblCruise_Salinity'
    rawFilePath = cfgv.rep_allSeaFlowCruises_raw
    os.chdir(rawFilePath)
    sfl_cruise_list = glob.glob('*.sfl*')
    usecols_sfl = ['DATE', 'LAT', 'LON', 'SALINITY']
    for cruise in sfl_cruise_list:
        prefix = cruise[:-8] + '_temp'
        rawFileName = cruise
        path = rawFilePath + rawFileName
        exportBase = cfgv.opedia_proj + 'db/dbInsert/export/'
        export_path = '%s%s.csv' % (exportBase, prefix)
        print(cruise)
        Cruise_ID = iF.findID_CRUISE(cruise[:-8])
        df = pd.read_csv(cruise, sep='\t', usecols=usecols_sfl)
        df['DATE'] = pd.to_datetime(df['DATE'], format='%Y-%m-%dT%H:%M:%S')
        df['DEPTH'] = 5.0
        df['Cruise_ID'] = Cruise_ID
        df.rename(columns={
            'DATE': 'time',
            'LAT': 'lat',
            'LON': 'lon',
            'DEPTH': 'depth',
            'SALINITY': 'salinity'
        },
                  inplace=True)
        df = df[['Cruise_ID', 'time', 'lat', 'lon', 'depth', 'salinity']]
        df = ip.removeMissings(['time', 'lat', 'lon', 'depth'], df)
        df = df[pd.to_numeric(df['salinity'], errors='coerce').notnull()]
        df = ip.NaNtoNone(df)
        df = ip.colDatatypes(df)
        df = ip.convertYYYYMMDD(df)
        df = ip.removeDuplicates(df)
        print(df.head())
        if df.empty:
            print(cruise +
                  ' had no salinity values. Not inserted into database')
        else:
            df.to_csv(export_path, index=False)
            ip.sortByTimeLatLon(df, export_path, 'time', 'lat', 'lon')
            print('export path: ', export_path)
            # print(export_path,tableName)
            iF.toSQLbcp(export_path, tableName, server)
示例#11
0
def insertAMTCruiseTemperature():
    server = 'Rainier'
    tableName = 'tblCruise_Temperature'
    usecols = ['Cruise_name', 'time', 'lat', 'lon', 'temp', 'temp_flag']
    rawFilePath = cfgv.rep_AMT_cruises_raw + 'amt/'
    rawFileName = 'master_AMT.csv'
    path = rawFilePath + rawFileName
    exportBase = cfgv.opedia_proj + 'db/dbInsert/export/'
    os.chdir(rawFilePath)

    df = pd.read_csv(rawFilePath + rawFileName, sep=',', usecols=usecols)
    for Cruise_name in df['Cruise_name'].unique():
        export_path = '%s%s%s.csv' % (exportBase, Cruise_name, tableName)

        print(Cruise_name)

        cruise_df = df[df['Cruise_name'] ==
                       Cruise_name]  #selects only df of cruise
        Cruise_ID = iF.findID_CRUISE(Cruise_name[0:3] + Cruise_name[-2:])
        cruise_df['Cruise_ID'] = Cruise_ID
        cruise_df = cruise_df[(cruise_df['temp_flag'] != 'N')
                              & (cruise_df['temp_flag'] != 'S') &
                              (cruise_df['temp_flag'] != 'M') &
                              (cruise_df['temp_flag'] != 'L')]
        cruise_df = ip.removeMissings(['time', 'lat', 'lon'], cruise_df)
        cruise_df = ip.convertYYYYMMDD(cruise_df)
        cruise_df = ip.colDatatypes(cruise_df)
        cruise_df = ip.convertYYYYMMDD(cruise_df)
        cruise_df = ip.removeDuplicates(cruise_df)
        cruise_df = ip.renameCol(cruise_df, 'temp', 'temperature')
        cruise_df = cruise_df[[
            'Cruise_ID', 'time', 'lat', 'lon', 'temperature'
        ]]
        cruise_df = cruise_df.dropna(subset=['temperature'])
        cruise_df = ip.NaNtoNone(cruise_df)

        if cruise_df.empty:
            print(Cruise_name +
                  ' had no temperature values. Not inserted into database')
        else:
            cruise_df.to_csv(export_path, index=False)
            ip.sortByTimeLatLon(cruise_df, export_path, 'time', 'lat', 'lon')
            print('export path: ', export_path)
            iF.toSQLbcp(export_path, tableName, server)
示例#12
0
def makeGlobal_PicoPhytoPlankton(rawFilePath, rawFileName, tableName):
    path = rawFilePath + rawFileName
    prefix = tableName
    exportBase = cfgv.opedia_proj + 'db/dbInsert/export/'
    export_path = '%s%s.csv' % (exportBase, prefix)
    df = pd.read_excel(path, sep=',', sheet_name='data', usecols=usecols)
    df['year'] = df['year'].astype('str')
    df['month'] = ((df['month'].astype('str')).apply(lambda x: x.zfill(2)))
    df['day'] = ((df['day'].astype('str')).apply(lambda x: x.zfill(2)))
    print(len(df))
    df = df[(df['day'] != '-9') & (df['day'] != '-1')]

    df['year'] = df['year'].replace('10', '2010')
    df['year'] = df['year'].replace('11', '2011')
    df['year'] = df['year'].replace('6', '2006')
    # df = df[(df['year'] != '10') & (df['year'] != '11')& (df['year'] != '6')]
    df['time'] = pd.to_datetime(df[['year', 'month', 'day']], format='%Y%m%d')
    ip.renameCol(df, 'Lat', 'lat')
    ip.renameCol(df, 'Long', 'lon')
    ip.renameCol(df, 'Depth', 'depth')
    ip.renameCol(df, 'PromL', 'prochlorococcus_abundance')
    ip.renameCol(df, 'SynmL', 'synechococcus_abundance')
    ip.renameCol(df, 'PEukmL', 'picoeukaryote_abundance')
    ip.renameCol(df, 'pico_abund', 'picophytoplankton_abundance')
    ip.renameCol(df, 'picophyto [ug C/L]', 'picophytoplankton_biomass')
    ip.removeColumn(['year', 'day', 'month'], df)
    df = ip.reorderCol(df, [
        'time', 'lat', 'lon', 'depth', 'prochlorococcus_abundance',
        'synechococcus_abundance', 'picoeukaryote_abundance',
        'picophytoplankton_abundance', 'picophytoplankton_biomass'
    ])
    df = ip.removeMissings(['time', 'lat', 'lon', 'depth'], df)
    df = ip.NaNtoNone(df)
    df = ip.colDatatypes(df)
    df = ip.addIDcol(df)
    df = ip.removeDuplicates(df)
    df.to_csv(export_path, index=False)
    ip.sortByTimeLatLonDepth(df, export_path, 'time', 'lat', 'lon', 'depth')
    print('export path: ', export_path)
    return export_path
DB = 'Opedia'
Dataset_Name = 'tblBottle_Chisholm'
Dataset_Long_Name = dataset_metadata.iloc[0]['dataset_long_name']
Data_Source = dataset_metadata.iloc[0]['dataset_source']
Distributor = 'https://chisholmlab.mit.edu/'
Description = dataset_metadata.iloc[0]['dataset_description']
Climatology = 'NULL'
# Variables =  ', '.join(list(vars_metadata['var_short_name']))
Variables = 'More than 50 variables: details can be found in tblVariables'
reference_list = (dataset_metadata.iloc[0]['dataset_references']).split(",")

DB_list = [DB] * len(vars_metadata)
Dataset_Name_list = [Dataset_Name] * len(vars_metadata)
short_name_list = list(vars_metadata['var_short_name'])
long_name_list = list(vars_metadata['var_long_name'])
unit_list = list(ip.NaNtoNone(vars_metadata['var_unit']))

spatial_res_list = list('1') * len(vars_metadata)  # Irregular
temporal_res_list = list('1') * len(vars_metadata)  # Irregular

keyword_list = list(vars_metadata['var_keywords'])
comment_list = list(ip.NaNtoNone(vars_metadata['var_comment']))
Temporal_Coverage_Begin_list = [cF.findMinMaxDate(Dataset_Name)['minDate']
                                ] * len(vars_metadata)
Temporal_Coverage_End_list = [cF.findMinMaxDate(Dataset_Name)['maxDate']
                              ] * len(vars_metadata)
Lat_Coverage_Begin_list = [cF.findSpatialBounds(Dataset_Name)['minLat']
                           ] * len(vars_metadata)
Lat_Coverage_End_list = [cF.findSpatialBounds(Dataset_Name)['maxLat']
                         ] * len(vars_metadata)
Lon_Coverage_Begin_list = [cF.findSpatialBounds(Dataset_Name)['minLon']
rawFilePath = '/media/nrhagen/Drobo/OpediaVault/model/darwin_3day/'
netcdf_list = glob.glob(rawFilePath + '*.nc')
exportBase = cfgv.opedia_proj + 'db/dbInsert/export_temp/'
prefix = tableName
export_path = '%s%s.csv' % (exportBase, prefix)
############################
############################
path = sys.argv[1]

if os.path.isfile(exportBase + os.path.basename(path)[:-3] + '_DONE.txt'): #checks .txt 'catalog' file exists before reprocessing
    sys.exit(0)
else:
    xdf = xr.open_dataset(path)
    df = xdf.to_dataframe()
    df.reset_index(inplace=True) # converts netcdf dims to cols
    df = ip.renameCol(df, 'lat_c', 'lat')
    df = ip.renameCol(df, 'lon_c', 'lon')
    df = ip.renameCol(df, 'dep_c', 'depth')
    df = ip.convertcolDatatype(df,['FeT', 'PO4', 'DIN', 'SiO2', 'O2'])
    # df = ip.removeMissings(['time','lat', 'lon', 'depth'], df)
    df = ip.arrangeColumns(['time','lat', 'lon','depth', 'FeT', 'PO4', 'DIN', 'SiO2', 'O2'], df)
    df = ip.NaNtoNone(df)
    df = ip.addIDcol(df)
    df = ip.colDatatypes(df)
    df.sort_values(['time', 'lat', 'lon', 'depth'], ascending=[True, True, True, True], inplace=True)
    df.to_csv(exportBase + os.path.basename(path)[:-3] + '.csv', mode='a', chunksize=1000000, index=False)

    # writes .txt file to catalog which files processed
    file = open(exportBase + os.path.basename(path)[:-3] + '_DONE.txt', "w")
    file.close()