Пример #1
0
def makeGLODAP(rawFilePath, rawFileName, tableName):
    path = rawFilePath + rawFileName
    prefix = tableName
    exportBase = cfgv.opedia_proj + 'db/dbInsert/export/'
    export_path = '%s%s.csv' % (exportBase, prefix)
    df = pd.read_csv(path, sep=',', usecols=usecols)

    df['year'] = df['year'].astype('int').astype(
        'str')  # removing ending zero, then str
    df['month'] = df['month'].astype('int').astype('str')
    df['day'] = df['day'].astype('int').astype('str')
    df['hour'] = df['hour'].astype('int').astype('str')
    df['minute'] = df['minute'].astype('int').astype('str')
    df['second'] = '0'
    #construct datetime
    df['time'] = pd.to_datetime(
        df[['year', 'month', 'day', 'hour', 'minute', 'second']],
        format='%Y%m%dT%H%M%S')

    ip.renameCol(df, 'latitude', 'lat')
    ip.renameCol(df, 'longitude', 'lon')
    # renaming Variables
    ip.renameCol(df, 'theta', 'theta_potential_temperature')
    ip.renameCol(df, 'sigma0', 'sigma0_potential_density')
    ip.renameCol(df, 'sigma1', 'sigma1_potential_density_ref_1000_dbar')
    ip.renameCol(df, 'sigma2', 'sigma2_potential_density_ref_2000_dbar')
    ip.renameCol(df, 'sigma3', 'sigma3_potential_density_ref_3000_dbar')
    ip.renameCol(df, 'sigma4', 'sigma4_potential_density_ref_4000_dbar')

    ip.renameCol(df, 'gamma', 'gamma_neutral_density')
    ip.renameCol(df, 'TAlk', 'TAlk_total_alkalinity')
    ip.renameCol(df, 'phts25p0', 'phts25p0_pH_25C_0dbar')
    ip.renameCol(df, 'phtsinsitutp', 'phtsinsitutp_pH_insitu')
    ip.renameCol(df, 'latitude', 'lat')
    ip.renameCol(df, 'latitude', 'lat')
    ip.renameCol(df, 'latitude', 'lat')
    ip.renameCol(df, 'latitude', 'lat')
    ip.renameCol(df, 'latitude', 'lat')
    ip.renameCol(df, 'latitude', 'lat')

    #import cruise data to ID file and do join
    expocodes = pd.read_csv(rawFilePath + rawFileName_expocodes,
                            sep='\t',
                            names=['cruise_ID', 'expocode'])
    df = pd.merge(df, expocodes, left_on='cruise', right_on='cruise_ID')
    df = df.drop('cruise_ID', 1)
    ip.renameCol(df, 'expocode', 'cruise_expocode')

    df = ip.arrangeColumns(usecols_rearange, df)
    df = ip.removeMissings(['time', 'lat', 'lon', 'depth'], df)
    df = ip.NaNtoNone(df)
    df = ip.colDatatypes(df)
    df = ip.convertYYYYMMDD(df)
    df = ip.addIDcol(df)
    df.to_csv(export_path, index=False)
    ip.sortByTimeLatLonDepth(df, export_path, 'time', 'lat', 'lon', 'depth')
    print('export path: ', export_path)
    return export_path, df
Пример #2
0
def makeHL2A_diel_metagenomics(rawFilePath, rawFileName, tableName):
    path = rawFilePath + rawFileName
    prefix = tableName
    exportBase = cfgv.opedia_proj + 'db/dbInsert/export/'
    export_path = '%s%s.csv' % (exportBase, prefix)
    df = pd.read_excel(path,  sep=',',sheet_name='data', usecols=usecols)
    df = ip.removeMissings(['time','lat', 'lon','depth'], df)
    df = ip.NaNtoNone(df)
    df = ip.colDatatypes(df)
    df = ip.addIDcol(df)
    df = ip.removeDuplicates(df)
    df.to_csv(export_path, index=False)
    ip.sortByTimeLatLonDepth(df, export_path, 'time', 'lat', 'lon', 'depth')
    print('export path: ' ,export_path)
    return export_path
Пример #3
0
def makeMesoscope_km1709(rawFilePath, rawFileName, tableName):
    path = rawFilePath + rawFileName
    prefix = tableName
    df = pd.read_excel(path, 'data')
    df = ip.removeMissings(['time', 'lat', 'lon', 'depth'], df)
    df = ip.colDatatypes(df)
    df['time'] = pd.to_datetime(df['time'], format='%Y-%m-%d')
    df['ID'] = None
    df = ip.removeDuplicates(df)
    exportBase = cfgv.opedia_proj + 'db/dbInsert/export/'
    export_path = '%s%s.csv' % (exportBase, prefix)
    df.to_csv(export_path, index=False)
    ip.sortByTimeLatLonDepth(df, export_path, 'time', 'lat', 'lon', 'depth')
    df.to_csv(export_path, index=False)
    print('export path: ', export_path)
    return export_path
Пример #4
0
def makeSingleCellGenomes_Chisholm(rawFilePath, rawFileName, tableName):
    path = rawFilePath + rawFileName
    prefix = tableName
    exportBase = cfgv.opedia_proj + 'db/dbInsert/export/'
    export_path = '%s%s.csv' % (exportBase, prefix)
    df = pd.read_excel(path, 'data')
    df = ip.removeMissings(['time', 'lat', 'lon', 'depth'], df)
    df = ip.NaNtoNone(df)
    df = ip.colDatatypes(df)
    df = ip.convertYYYYMMDD(df)
    df = ip.addIDcol(df)
    df = ip.removeDuplicates(df)
    df.to_csv(export_path, index=False)
    ip.sortByTimeLatLonDepth(df, export_path, 'time', 'lat', 'lon', 'depth')
    df.to_csv(export_path, index=False)
    print('export path: ', export_path)
    return export_path
Пример #5
0
def makeFlombaum(rawFilePath, rawFileName, tableName):
    path = rawFilePath + rawFileName
    prefix = tableName
    exportBase = cfgv.opedia_proj + 'db/dbInsert/export/'
    export_path = '%s%s.csv' % (exportBase, prefix)
    df = pd.read_excel(path, sep=',', sheet_name='data')
    df = ip.removeMissings(['time', 'lat', 'lon', 'depth'], df)
    df = ip.NaNtoNone(df)
    df = ip.colDatatypes(df)
    df = ip.convertYYYYMMDD(df)
    df = ip.addIDcol(df)
    df = ip.removeDuplicates(df)
    df['lon'] = df['lon'].abs()
    df.to_csv(export_path, index=False)
    ip.mapTo180180(export_path, 'lon')
    ip.sortByTimeLatLonDepth(df, export_path, 'time', 'lat', 'lon', 'depth')
    print('export path: ', export_path)
    return export_path
Пример #6
0
def makeGlobal_PicoPhytoPlankton(rawFilePath, rawFileName, tableName):
    path = rawFilePath + rawFileName
    prefix = tableName
    exportBase = cfgv.opedia_proj + 'db/dbInsert/export/'
    export_path = '%s%s.csv' % (exportBase, prefix)
    df = pd.read_excel(path, sep=',', sheet_name='data', usecols=usecols)
    df['year'] = df['year'].astype('str')
    df['month'] = ((df['month'].astype('str')).apply(lambda x: x.zfill(2)))
    df['day'] = ((df['day'].astype('str')).apply(lambda x: x.zfill(2)))
    print(len(df))
    df = df[(df['day'] != '-9') & (df['day'] != '-1')]

    df['year'] = df['year'].replace('10', '2010')
    df['year'] = df['year'].replace('11', '2011')
    df['year'] = df['year'].replace('6', '2006')
    # df = df[(df['year'] != '10') & (df['year'] != '11')& (df['year'] != '6')]
    df['time'] = pd.to_datetime(df[['year', 'month', 'day']], format='%Y%m%d')
    ip.renameCol(df, 'Lat', 'lat')
    ip.renameCol(df, 'Long', 'lon')
    ip.renameCol(df, 'Depth', 'depth')
    ip.renameCol(df, 'PromL', 'prochlorococcus_abundance')
    ip.renameCol(df, 'SynmL', 'synechococcus_abundance')
    ip.renameCol(df, 'PEukmL', 'picoeukaryote_abundance')
    ip.renameCol(df, 'pico_abund', 'picophytoplankton_abundance')
    ip.renameCol(df, 'picophyto [ug C/L]', 'picophytoplankton_biomass')
    ip.removeColumn(['year', 'day', 'month'], df)
    df = ip.reorderCol(df, [
        'time', 'lat', 'lon', 'depth', 'prochlorococcus_abundance',
        'synechococcus_abundance', 'picoeukaryote_abundance',
        'picophytoplankton_abundance', 'picophytoplankton_biomass'
    ])
    df = ip.removeMissings(['time', 'lat', 'lon', 'depth'], df)
    df = ip.NaNtoNone(df)
    df = ip.colDatatypes(df)
    df = ip.addIDcol(df)
    df = ip.removeDuplicates(df)
    df.to_csv(export_path, index=False)
    ip.sortByTimeLatLonDepth(df, export_path, 'time', 'lat', 'lon', 'depth')
    print('export path: ', export_path)
    return export_path