def test_readme_output_file_tester(self): input_template=\ """ Date ran:{run_date} Process: Resample Input Data to {inc_amt} {label_name} Resolution Input filename: {file_name} Years: {years} Depths: {depths} Samples: {samples} """ f=os.path.join('csv_files','input', 'small.csv') run_date='2017-08-03' inc_amt=1 file=['files'] label_name='year' stat_header=['mean'] result=readme_output_file(input_template,DataClass(f),run_date, inc_amt, label_name, stat_header,file) expected_result=\ """ Date ran:2017-08-03 Process: Resample Input Data to 1 year Resolution Input filename: small.csv Years: Dat210617, Dat011216V2 Depths: depth (m we) , depth (m abs) Samples: Cond (+ALU-S/cm), Na (ppb), Ca (ppb), Dust (part/ml), NH4 (ppb), NO3 (ppb) """ self.assertEqual(result,expected_result)
def plot_samples_by_year(f: str, interval: List = []): dc = DataClass(f) folder = os.path.join(dc.dirname, 'Output_Files') if not os.path.exists(folder): os.makedirs(folder) for y in dc.year_headers: if not interval == []: pdf_file = os.path.join(folder, ('plot_%s_%.0f-%.0f.pdf' % (y.label, interval[0], interval[1]))) else: pdf_file = os.path.join(folder, ('plot_%s.pdf' % (y.label))) with PdfPages(pdf_file) as pdf: for i, sample in enumerate(dc.sample_headers): plot_samples(i, dc, sample, y, pdf, interval) try: os.startfile(pdf_file) except: subprocess.call(['open', pdf_file])
import unittest from pandas import DataFrame import os from pandas.util.testing import assert_frame_equal from climatechange.headers import process_header_str from climatechange.resample import resample,depth_columns, find_match, resample_by,\ by_years, by_depths, create_range_for_depths, create_range_by_year from climatechange.common_functions import DataClass, clean_data,\ load_csv, to_csv # dc = DataClass(os.path.join('csv_files','input', 'small.csv')) small_file = os.path.join('csv_files','input', 'small.csv') dc = DataClass(small_file) output_small_file = os.path.join('csv_files','output','Year_Dat210617_(CE)_resampled_by_1_year_resolution_for_mean.csv') output_small_file2 = os.path.join('csv_files','output','Year_Dat210617_(CE)_resampled_by_2_year_resolution_for_mean.csv') output_small_filem = os.path.join('csv_files','output','Year_Dat210617_(CE)_resampled_by_1_year_resolution_for_max_std.csv') output_small_file_depth = os.path.join('csv_files','output','small_resample_by_0.001_depth_abs_(m)_mean.csv') output_small_file_01 = os.path.join('csv_files','output','small_resample_by_0.01_depth_abs_(m)_max_count.csv') output_small_file_stat = os.path.join('csv_files','output','small_resample_by_0.01_depth_abs_(m)_stat.csv') output_small_year_stat = os.path.join('csv_files','output','small_resample_by_1_year_stat.csv') input_test_zeros_and_numbers = clean_data(load_csv(os.path.join('csv_files', 'input_test_zeros_and_numbers.csv'))) output_by_LR = os.path.join('csv_files','output','resample_by_LR_output.csv') f_HR = os.path.join('csv_files','input', 'test_input_dd_2.csv') f_LR = os.path.join('csv_files','input', 'test_input_dd_1.csv')
def resample_data(directory: str, by: str, depth_age_file: str, prefix='KCC', depth='depth (m abs)', output=True): ''' Compiles all raw LA-ICP-MS data within the specified directory that share the specified prefix, compiles age and depth according to specified depth_age file ''' dfMR = DataFrame() dfLR = DataFrame() df = DataFrame() by = DataClass(by) for folder in os.listdir(directory): if folder.startswith(prefix): for input_folder in sorted( os.listdir(os.path.join(directory, folder))): if input_folder.startswith('Input'): for file in sorted( os.listdir( os.path.join(directory, folder, input_folder))): if (file.startswith('InputFile_1')) |(file.startswith('Input') & file.endswith('1')) | (file.startswith('Input') & file.endswith('MR')) | \ (file.startswith('Input') & file.endswith('1.txt')) | (file.startswith('Input') & file.endswith('MR.txt')) : laser_files = load_input( os.path.join(directory, folder, input_folder, file)) for f in laser_files: df = df.append(f.info, ignore_index=True) dfMR = dfMR.append( resample_laser_by( process_laser_data(f, depth_age_file), by.df, depth)) elif (file.startswith('InputFile_2')) |(file.startswith('Input') & file.endswith('2')) | (file.startswith('Input') & file.endswith('LR')) | \ (file.startswith('Input') & file.endswith('2.txt')) | (file.startswith('Input') & file.endswith('LR.txt')) : laser_files = load_input( os.path.join(directory, folder, input_folder, file)) for f in laser_files: dfLR = dfLR.append( resample_laser_by( process_laser_data(f, depth_age_file), by.df, depth)) if output: output = 'Raw_Resampled_by_{}'.format(by.base) to_csv(directory, dfMR, 'LA-ICP-MS_{}_MR.csv'.format(output)) to_csv(directory, dfLR, 'LA-ICP-MS_{}_LR.csv'.format(output)) info_file = 'full_core_information.csv' if not os.path.isfile( os.path.join(directory, 'Output_Files', info_file)): to_csv(directory, df, info_file, False) readmeMR = readme_laser_file(laser_template, directory, prefix, depth_age_file, FrameClass(dfMR), 'Medium', str(datetime.date.today()), info_file, 'LA-ICP-MS_raw_MR.csv', output) readmeLR = readme_laser_file(laser_template, directory, prefix, depth_age_file, FrameClass(dfLR), 'Low', str(datetime.date.today()), info_file, 'LA-ICP-MS_raw_LR.csv', output) write_readmefile_to_txtfile( readmeMR, os.path.join(directory, 'Output_Files', '00README_{}_Medium_Resolution.txt'.format(output))) write_readmefile_to_txtfile( readmeLR, os.path.join(directory, 'Output_Files', '00README_{}_Low_Resolution.txt'.format(output))) return dfMR, dfLR
def resample(by: str, f: str, stat: str = ['mean'], inc_amt: int = 1, by_name: str = None, output=True): ''' Resampler by Years or Depths a. Input: dataset with years, depths, samples $ PYTHONPATH=. python climatechange/process_data.py -year_name ../test/csv_files/small.csv a. Output: csv file with statistics for each sample by years/depths :param: f: This is a CSV file ''' logging.info("Creating pdf for %s", f) dc = DataClass(f) if (by == 'year') | (by == 'Year') | (by == 'y') | (by == 'Y'): if by_name: headers = process_header_str(by_name) else: headers = dc.year_headers x = 0 elif (by == 'depth') | (by == 'Depth') | (by == 'd') | (by == 'D'): if by_name: headers = process_header_str(by_name) else: headers = dc.depth_headers x = 1 all_files = [] dfs = [] for h in headers: if x == 0: df = by_years(dc, h, inc_amt, stat) else: df = by_depths(dc, h, inc_amt, stat) dfs.append(df) if stat: if type(stat) == str: file = '{}_resample_by_{}_{}_{}.csv'.format( dc.base, inc_amt, h.label, stat) else: file = '{}_resample_by_{}_{}_{}.csv'.format( dc.base, inc_amt, h.label, '_'.join(stat)) else: file = '{}_resample_by_{}_{}_stats.csv'.format( dc.base, inc_amt, h.label) # pdf_file= '{}_resampled_by_{}_{}_resolution_for_{}.pdf'.format(h.label, inc_amt,h.unit,'_'.join(stat)) all_files.append(file) if output: to_csv(dc.dirname, df, file) readme = readme_output_file(resample_template, dc, str(datetime.date.today()), inc_amt, by, stat, all_files) write_readmefile_to_txtfile( readme, os.path.join( dc.dirname, '00README_resample_{}_{}_{}_resolution.txt'.format( h.label, inc_amt, by))) return dfs
def resample_by(filename: str, resample_by: str, stat: List[str] = None, depth: str = None, output=True): ''' From the given data frame compile statistics (mean, median, min, max, etc) based on the parameters. :param df1:Larger Dataframe with smaller intervals to create a compiled stat :param df2:Smaller Dataframe with larger intervals to create index of intervals :return: A list of list of CompiledStat containing the resampled statistics for the specified sample and depth by the depth interval from df2. ''' dc = DataClass(filename) dc_by = DataClass(resample_by) if depth: headers = process_header_str(depth) else: headers = find_match(dc, dc_by) headers_by = [] resample = [] all_files = [] for h in headers: hr = dc.sample_df.set_index(dc.df[h.name]) lr = dc_by.sample_df.set_index(dc_by.df[h.name]) lr = lr[(lr.index >= min(hr.index)) & (lr.index <= max(hr.index))] stat_dict = [] for s in dc.sample_headers: df = DataFrame() if lr.empty: return [df] for i in range(len(lr.index.tolist()) - 1): idx = hr[(hr.index >= lr.index[i]) & (hr.index < lr.index[i + 1])] df = df.append(idx[s.name].describe(), ignore_index=True) if stat: df = df[stat] try: df.columns = [s.label + '_' + col for col in df] except TypeError: df.name = s.label + '_' + df.name stat_dict.append(df) if type(stat) == str: file = '{}_resampled_by_{}_{}_{}.csv'.format( dc.base, dc_by.base, h.label, stat) else: file = '{}_resampled_by_{}_{}_{}.csv'.format( dc.base, dc_by.base, h.label, '_'.join(stat)) else: df.columns = [s.label + '_' + col for col in df] stat_dict.append(df) file = '{}_resample_by_{}_{}.csv'.format( dc.base, dc_by.base, h.label) all_files.append(file) stat_df = pandas.concat(stat_dict, axis=1) stat_df = stat_df.set_index([lr.index[:-1]]) stat_df.index.name = h.label if output: to_csv(dc.dirname, stat_df, file) readme = readme_output_file(resample_template, dc, str(datetime.date.today()), dc_by.base, 'depth', stat, all_files) write_readmefile_to_txtfile( readme, os.path.join( dc.dirname, '00README_resample_{}_by_{}.txt'.format( h.label, dc_by.base))) headers_by.append(stat_df) return headers_by