Exemplo n.º 1
0
    def test_readme_output_file_tester(self):
        input_template=\
"""
Date ran:{run_date}
   
Process: Resample Input Data to {inc_amt} {label_name} Resolution
   
Input filename: {file_name}
Years: {years}
Depths: {depths}
Samples: {samples}
"""
        f=os.path.join('csv_files','input', 'small.csv')
        run_date='2017-08-03'
        inc_amt=1
        file=['files']
        label_name='year'
        stat_header=['mean']

        result=readme_output_file(input_template,DataClass(f),run_date, inc_amt, label_name, stat_header,file)
        expected_result=\
"""
Date ran:2017-08-03
   
Process: Resample Input Data to 1 year Resolution
   
Input filename: small.csv
Years: Dat210617, Dat011216V2
Depths: depth (m we) , depth (m abs)
Samples: Cond (+ALU-S/cm), Na (ppb), Ca (ppb), Dust (part/ml), NH4 (ppb), NO3 (ppb)
"""
        self.assertEqual(result,expected_result)
Exemplo n.º 2
0
def plot_samples_by_year(f: str, interval: List = []):

    dc = DataClass(f)
    folder = os.path.join(dc.dirname, 'Output_Files')
    if not os.path.exists(folder):
        os.makedirs(folder)

    for y in dc.year_headers:
        if not interval == []:
            pdf_file = os.path.join(folder,
                                    ('plot_%s_%.0f-%.0f.pdf' %
                                     (y.label, interval[0], interval[1])))
        else:
            pdf_file = os.path.join(folder, ('plot_%s.pdf' % (y.label)))
        with PdfPages(pdf_file) as pdf:
            for i, sample in enumerate(dc.sample_headers):
                plot_samples(i, dc, sample, y, pdf, interval)
        try:
            os.startfile(pdf_file)
        except:
            subprocess.call(['open', pdf_file])
import unittest
from pandas import DataFrame
import os
from pandas.util.testing import assert_frame_equal

from climatechange.headers import process_header_str
from climatechange.resample import resample,depth_columns, find_match, resample_by,\
    by_years, by_depths, create_range_for_depths, create_range_by_year

from climatechange.common_functions import DataClass, clean_data,\
    load_csv, to_csv
    
    
# dc = DataClass(os.path.join('csv_files','input', 'small.csv'))
small_file = os.path.join('csv_files','input', 'small.csv')
dc = DataClass(small_file)
output_small_file = os.path.join('csv_files','output','Year_Dat210617_(CE)_resampled_by_1_year_resolution_for_mean.csv')
output_small_file2 = os.path.join('csv_files','output','Year_Dat210617_(CE)_resampled_by_2_year_resolution_for_mean.csv')
output_small_filem = os.path.join('csv_files','output','Year_Dat210617_(CE)_resampled_by_1_year_resolution_for_max_std.csv')
output_small_file_depth = os.path.join('csv_files','output','small_resample_by_0.001_depth_abs_(m)_mean.csv')
output_small_file_01 = os.path.join('csv_files','output','small_resample_by_0.01_depth_abs_(m)_max_count.csv')
output_small_file_stat = os.path.join('csv_files','output','small_resample_by_0.01_depth_abs_(m)_stat.csv')
output_small_year_stat = os.path.join('csv_files','output','small_resample_by_1_year_stat.csv')

input_test_zeros_and_numbers = clean_data(load_csv(os.path.join('csv_files', 'input_test_zeros_and_numbers.csv')))



output_by_LR = os.path.join('csv_files','output','resample_by_LR_output.csv')
f_HR = os.path.join('csv_files','input', 'test_input_dd_2.csv')
f_LR = os.path.join('csv_files','input', 'test_input_dd_1.csv')
def resample_data(directory: str,
                  by: str,
                  depth_age_file: str,
                  prefix='KCC',
                  depth='depth (m abs)',
                  output=True):
    '''
    
    Compiles all raw LA-ICP-MS data within the specified directory that share the
        specified prefix, compiles age and depth according to specified depth_age file
    '''
    dfMR = DataFrame()
    dfLR = DataFrame()
    df = DataFrame()
    by = DataClass(by)

    for folder in os.listdir(directory):
        if folder.startswith(prefix):

            for input_folder in sorted(
                    os.listdir(os.path.join(directory, folder))):
                if input_folder.startswith('Input'):
                    for file in sorted(
                            os.listdir(
                                os.path.join(directory, folder,
                                             input_folder))):
                        if (file.startswith('InputFile_1')) |(file.startswith('Input') & file.endswith('1')) | (file.startswith('Input') & file.endswith('MR')) | \
                            (file.startswith('Input') & file.endswith('1.txt')) | (file.startswith('Input') & file.endswith('MR.txt')) :

                            laser_files = load_input(
                                os.path.join(directory, folder, input_folder,
                                             file))

                            for f in laser_files:
                                df = df.append(f.info, ignore_index=True)
                                dfMR = dfMR.append(
                                    resample_laser_by(
                                        process_laser_data(f, depth_age_file),
                                        by.df, depth))

                        elif (file.startswith('InputFile_2')) |(file.startswith('Input') & file.endswith('2')) | (file.startswith('Input') & file.endswith('LR')) | \
                            (file.startswith('Input') & file.endswith('2.txt')) | (file.startswith('Input') & file.endswith('LR.txt')) :

                            laser_files = load_input(
                                os.path.join(directory, folder, input_folder,
                                             file))

                            for f in laser_files:
                                dfLR = dfLR.append(
                                    resample_laser_by(
                                        process_laser_data(f, depth_age_file),
                                        by.df, depth))

    if output:
        output = 'Raw_Resampled_by_{}'.format(by.base)
        to_csv(directory, dfMR, 'LA-ICP-MS_{}_MR.csv'.format(output))
        to_csv(directory, dfLR, 'LA-ICP-MS_{}_LR.csv'.format(output))

        info_file = 'full_core_information.csv'
        if not os.path.isfile(
                os.path.join(directory, 'Output_Files', info_file)):
            to_csv(directory, df, info_file, False)

        readmeMR = readme_laser_file(laser_template,
                                     directory, prefix, depth_age_file,
                                     FrameClass(dfMR), 'Medium',
                                     str(datetime.date.today()), info_file,
                                     'LA-ICP-MS_raw_MR.csv', output)
        readmeLR = readme_laser_file(laser_template, directory, prefix,
                                     depth_age_file, FrameClass(dfLR), 'Low',
                                     str(datetime.date.today()), info_file,
                                     'LA-ICP-MS_raw_LR.csv', output)

        write_readmefile_to_txtfile(
            readmeMR,
            os.path.join(directory, 'Output_Files',
                         '00README_{}_Medium_Resolution.txt'.format(output)))
        write_readmefile_to_txtfile(
            readmeLR,
            os.path.join(directory, 'Output_Files',
                         '00README_{}_Low_Resolution.txt'.format(output)))

    return dfMR, dfLR
def resample(by: str,
             f: str,
             stat: str = ['mean'],
             inc_amt: int = 1,
             by_name: str = None,
             output=True):
    '''
    Resampler by Years or Depths
    a. Input: dataset with years, depths, samples
    
    $ PYTHONPATH=. python climatechange/process_data.py -year_name ../test/csv_files/small.csv

    a. Output: csv file with statistics for each sample by years/depths

    :param: f: This is a CSV file
    '''
    logging.info("Creating pdf for %s", f)
    dc = DataClass(f)
    if (by == 'year') | (by == 'Year') | (by == 'y') | (by == 'Y'):
        if by_name:
            headers = process_header_str(by_name)
        else:
            headers = dc.year_headers
        x = 0
    elif (by == 'depth') | (by == 'Depth') | (by == 'd') | (by == 'D'):
        if by_name:
            headers = process_header_str(by_name)
        else:
            headers = dc.depth_headers
        x = 1

    all_files = []
    dfs = []
    for h in headers:
        if x == 0:
            df = by_years(dc, h, inc_amt, stat)
        else:
            df = by_depths(dc, h, inc_amt, stat)
        dfs.append(df)

        if stat:
            if type(stat) == str:
                file = '{}_resample_by_{}_{}_{}.csv'.format(
                    dc.base, inc_amt, h.label, stat)
            else:
                file = '{}_resample_by_{}_{}_{}.csv'.format(
                    dc.base, inc_amt, h.label, '_'.join(stat))
        else:
            file = '{}_resample_by_{}_{}_stats.csv'.format(
                dc.base, inc_amt, h.label)

#         pdf_file= '{}_resampled_by_{}_{}_resolution_for_{}.pdf'.format(h.label, inc_amt,h.unit,'_'.join(stat))
        all_files.append(file)
        if output:

            to_csv(dc.dirname, df, file)

            readme = readme_output_file(resample_template, dc,
                                        str(datetime.date.today()), inc_amt,
                                        by, stat, all_files)
            write_readmefile_to_txtfile(
                readme,
                os.path.join(
                    dc.dirname,
                    '00README_resample_{}_{}_{}_resolution.txt'.format(
                        h.label, inc_amt, by)))

    return dfs
def resample_by(filename: str,
                resample_by: str,
                stat: List[str] = None,
                depth: str = None,
                output=True):
    '''
    From the given data frame compile statistics (mean, median, min, max, etc)
    based on the parameters.
 
    :param df1:Larger Dataframe with smaller intervals to create a compiled stat
    :param df2:Smaller Dataframe with larger intervals to create index of intervals
    :return: A list of list of CompiledStat containing the resampled statistics for the
    specified sample and depth by the depth interval from df2.
    '''

    dc = DataClass(filename)
    dc_by = DataClass(resample_by)
    if depth:
        headers = process_header_str(depth)
    else:
        headers = find_match(dc, dc_by)

    headers_by = []
    resample = []
    all_files = []
    for h in headers:
        hr = dc.sample_df.set_index(dc.df[h.name])
        lr = dc_by.sample_df.set_index(dc_by.df[h.name])

        lr = lr[(lr.index >= min(hr.index)) & (lr.index <= max(hr.index))]

        stat_dict = []
        for s in dc.sample_headers:
            df = DataFrame()
            if lr.empty:
                return [df]
            for i in range(len(lr.index.tolist()) - 1):

                idx = hr[(hr.index >= lr.index[i])
                         & (hr.index < lr.index[i + 1])]

                df = df.append(idx[s.name].describe(), ignore_index=True)

            if stat:
                df = df[stat]
                try:
                    df.columns = [s.label + '_' + col for col in df]
                except TypeError:
                    df.name = s.label + '_' + df.name
                stat_dict.append(df)
                if type(stat) == str:
                    file = '{}_resampled_by_{}_{}_{}.csv'.format(
                        dc.base, dc_by.base, h.label, stat)
                else:
                    file = '{}_resampled_by_{}_{}_{}.csv'.format(
                        dc.base, dc_by.base, h.label, '_'.join(stat))
            else:
                df.columns = [s.label + '_' + col for col in df]
                stat_dict.append(df)
                file = '{}_resample_by_{}_{}.csv'.format(
                    dc.base, dc_by.base, h.label)
        all_files.append(file)

        stat_df = pandas.concat(stat_dict, axis=1)
        stat_df = stat_df.set_index([lr.index[:-1]])

        stat_df.index.name = h.label
        if output:
            to_csv(dc.dirname, stat_df, file)

            readme = readme_output_file(resample_template, dc,
                                        str(datetime.date.today()), dc_by.base,
                                        'depth', stat, all_files)
            write_readmefile_to_txtfile(
                readme,
                os.path.join(
                    dc.dirname, '00README_resample_{}_by_{}.txt'.format(
                        h.label, dc_by.base)))

    headers_by.append(stat_df)
    return headers_by