async def plot_dailies(logger): """ Loads dailies for the last 3 weeks and plots with ticks for every three days and minor ticks for every day. Plots are registered with the core database so they're uploaded to the Taylor drive. :param logger: logger, to log events to :return: Boolean, True if it ran without error and created data, False if not """ try: from pathlib import Path import datetime as dt from summit_core import connect_to_db, core_dir, TempDir, Config, Plot, add_or_ignore_plot, create_daily_ticks plotdir = core_dir / 'plots/daily' remotedir = r'/data/web/htdocs/instaar/groups/arl/summit/protected/plots' try: os.chdir(plotdir) except FileNotFoundError: os.mkdir(plotdir) except ImportError as e: logger.error(f'ImportError occurred in plot_dailies()') send_processor_email(PROC, exception=e) return False try: engine, session = connect_to_db('sqlite:///summit_daily.sqlite', core_dir) Base.metadata.create_all(engine) except Exception as e: logger.error( f'Error {e.args} prevented connecting to the database in plot_dailies()' ) send_processor_email(PROC, exception=e) return False try: core_engine, core_session = connect_to_db( 'sqlite:///summit_core.sqlite', core_dir) Plot.__table__.create(core_engine, checkfirst=True) Config.__table__.create(core_engine, checkfirst=True) daily_config = core_session.query(Config).filter( Config.processor == PROC).one_or_none() if not daily_config: daily_config = Config( processor=PROC, days_to_plot=21 ) # use all default values except processor on init core_session.add(daily_config) core_session.commit() except Exception as e: logger.error( f'Error {e.args} prevented connecting to the core database in plot_new_data()' ) send_processor_email(PROC, exception=e) return False try: logger.info('Running plot_dailies()') date_ago = datetime.now() - dt.timedelta( days=daily_config.days_to_plot + 1) # set a static for retrieving data at beginning of plot cycle date_limits, major_ticks, minor_ticks = create_daily_ticks( daily_config.days_to_plot, minors_per_day=1) major_ticks = [t for ind, t in enumerate(major_ticks) if ind % 3 == 0] # use every third daily tick dailies = session.query(Daily).filter(Daily.date >= date_ago).order_by( Daily.date).all() dailydict = {} for param in daily_parameters: dailydict[param] = [getattr(d, param) for d in dailies] with TempDir(plotdir): ## PLOT i-butane, n-butane, acetylene name = summit_daily_plot(dailydict.get('date'), ({ 'Ads Xfer A': [None, dailydict.get('ads_xfer_a')], 'Ads Xfer B': [None, dailydict.get('ads_xfer_b')], 'Valves Temp': [None, dailydict.get('valves_temp')], 'GC Xfer Temp': [None, dailydict.get('gc_xfer_temp')], 'Catalyst': [None, dailydict.get('catalyst')] }), limits={ 'right': date_limits.get('right', None), 'left': date_limits.get('left', None), 'bottom': 0, 'top': 475 }, major_ticks=major_ticks, minor_ticks=minor_ticks) hot_plot = Plot(plotdir / name, remotedir, True) add_or_ignore_plot(hot_plot, core_session) name = summit_daily_plot(dailydict.get('date'), ({ 'CJ1 Temp': [None, dailydict.get('cj1')], 'CJ2 Temp': [None, dailydict.get('cj2')], 'Standard Temp': [None, dailydict.get('std_temp')] }), limits={ 'right': date_limits.get('right', None), 'left': date_limits.get('left', None), 'bottom': 10, 'top': 50 }, major_ticks=major_ticks, minor_ticks=minor_ticks) room_plot = Plot(plotdir / name, remotedir, True) add_or_ignore_plot(room_plot, core_session) name = summit_daily_plot(dailydict.get('date'), ({ 'H2 Gen Pressure': [None, dailydict.get('h2_gen_p')], 'Line Pressure': [None, dailydict.get('line_p')], 'Zero Pressure': [None, dailydict.get('zero_p')], 'FID Pressure': [None, dailydict.get('fid_p')] }), limits={ 'right': date_limits.get('right', None), 'left': date_limits.get('left', None), 'bottom': 0, 'top': 75 }, y_label_str='Pressure (PSI)', major_ticks=major_ticks, minor_ticks=minor_ticks) pressure_plot = Plot(plotdir / name, remotedir, True) add_or_ignore_plot(pressure_plot, core_session) name = summit_daily_plot(dailydict.get('date'), ({ 'Inlet Short Temp': [None, dailydict.get('inlet_short')] }), limits={ 'right': date_limits.get('right', None), 'left': date_limits.get('left', None), 'bottom': 0, 'top': 60 }, major_ticks=major_ticks, minor_ticks=minor_ticks) inlet_plot = Plot(plotdir / name, remotedir, True) add_or_ignore_plot(inlet_plot, core_session) name = summit_daily_plot(dailydict.get('date'), ({ 'Battery V': [None, dailydict.get('battv')], '12Va': [None, dailydict.get('v12a')], '15Va': [None, dailydict.get('v15a')], '15Vb': [None, dailydict.get('v15b')], '24V': [None, dailydict.get('v24')], '5Va': [None, dailydict.get('v5a')] }), limits={ 'right': date_limits.get('right', None), 'left': date_limits.get('left', None), 'bottom': 0, 'top': 30 }, y_label_str='Voltage (v)', major_ticks=major_ticks, minor_ticks=minor_ticks) voltage_plot = Plot(plotdir / name, remotedir, True) add_or_ignore_plot(voltage_plot, core_session) name = summit_daily_plot(dailydict.get('date'), ({ 'MFC1': [None, dailydict.get('mfc1')], 'MFC2': [None, dailydict.get('mfc2')], 'MFC3a': [None, dailydict.get('mfc3a')], 'MFC3b': [None, dailydict.get('mfc3b')], 'MFC4': [None, dailydict.get('mfc4')], 'MFC5': [None, dailydict.get('mfc5')] }), limits={ 'right': date_limits.get('right', None), 'left': date_limits.get('left', None), 'bottom': -1, 'top': 3.5 }, y_label_str='Flow (Ml/min)', major_ticks=major_ticks, minor_ticks=minor_ticks) flow_plot = Plot(plotdir / name, remotedir, True) add_or_ignore_plot(flow_plot, core_session) core_session.commit() core_session.close() core_engine.dispose() session.close() engine.dispose() return True except Exception as e: logger.error(f'Exception {e.args} occurred in plot_dailies()') send_processor_email(PROC, exception=e) session.close() engine.dispose() return False
async def update_excel_sheet(logger): """ This checks for new GcRuns since it was last ran and creates a DataFrame containing run information that's appended to a spreadsheet on the Z-drive. This sheet is filled out by whoever does the manual integration, and is later read by TODO - I haven't written that yet to bring the updated peak areas back into the database and re-calculate mixing ratios. :param logger: logging logger for info and failures :return: bool, True if ran, False if errored """ logger.info('Running update_excel_sheet()') try: import pandas as pd from datetime import datetime from summit_core import methane_dir as rundir from summit_errors import send_processor_warning from summit_methane import GcRun, Base, add_formulas_and_format_sheet from summit_core import Config, connect_to_db, append_df_to_excel from summit_core import methane_dir, core_dir, data_file_paths methane_sheet = data_file_paths.get('methane_sheet', None) if not methane_sheet: logger.error( 'Filepath for the methane integration sheet could not be retrieved.' ) send_processor_warning( PROC, 'Filepath Error', '''The methane integration sheet filepath could not be retrieved. It should be listed as "methane_sheet" in file_locations.json in the core folder.''' ) return False except ImportError as e: logger.error('ImportError occurred in update_excel_sheet()') send_processor_email(PROC, exception=e) return False try: engine, session = connect_to_db('sqlite:///summit_methane.sqlite', rundir) Base.metadata.create_all(engine) except Exception as e: logger.error( f'Exception {e.args} prevented connection to the database in update_excel_sheet()' ) send_processor_email(PROC, exception=e) return False try: core_engine, core_session = connect_to_db( 'sqlite:///summit_core.sqlite', core_dir) Config.__table__.create(core_engine, checkfirst=True) methane_sheet_config = core_session.query(Config).filter( Config.processor == 'methane_sheet').one_or_none() if not methane_sheet_config: methane_sheet_config = Config(processor='methane_sheet') # use all default values except processor on init core_session.add(methane_sheet_config) core_session.commit() except Exception as e: logger.error( f'Error {e.args} prevented connecting to the core database in update_excel_sheet()' ) send_processor_email(PROC, exception=e) return False try: most_recent_gcrun = session.query(GcRun.date).order_by( GcRun.date.desc()).first() if not most_recent_gcrun: most_recent_gcrun = datetime( 1900, 1, 1) # default to a safely historic date else: most_recent_gcrun = most_recent_gcrun.date # get date from tuple response # object list of all the runs past the most recent date new_runs = session.query(GcRun).filter( GcRun.date > methane_sheet_config.last_data_date).all() if new_runs: col_list = [ 'date', 'filename', 'peak1', 'peak2', 'mr1', 'mr2', 'run_median', 'run_rsd', 'std_median', 'std_rsd' ] # list of all columns needed in the dataframe master_df = pd.DataFrame( index=None, columns=col_list) # frame an empty df for new run data for run in new_runs: df = pd.DataFrame( index=range(1, 6), columns=col_list) # create a five-row block to add later df['date'][1] = run.date df['filename'][ 1] = run.logfile.name # add date and filename for this block # The below can copy peak information from the automatic integrations into the spreadsheet # peaks1 = [sample.peak for sample in run.samples if sample.sample_num in [0,2,4,6,8]] # peaks2 = [sample.peak for sample in run.samples if sample.sample_num in [1,3,5,7,9]] # df.loc[0:5, 'peak1'] = [(peak.pa if peak else None) for peak in peaks1] # df.loc[0:5, 'peak2'] = [(peak.pa if peak else None) for peak in peaks2] master_df = master_df.append( df) # append block to all new ones so far # TODO: Anything touching sheets need to be carefully made to catch inacessible files ###################### append_df_to_excel(methane_sheet, master_df, **{'index': False}) # add all new lines and save sheet add_formulas_and_format_sheet( methane_sheet ) # open sheet and add formulas where non-existent, format cols logger.info( 'New GcRuns added to the automated integration spreadsheet.') methane_sheet_config.last_data_date = most_recent_gcrun else: logger.info( 'No new GcRuns found to add to the automated integration spreadsheet.' ) core_session.merge(methane_sheet_config) core_session.commit() session.close() engine.dispose() core_session.close() core_engine.dispose() return True except Exception as e: session.close() engine.dispose() core_session.close() core_engine.dispose() logger.error(f'Exception {e.args} occurred in update_excel_sheet()') send_processor_email(PROC, exception=e) return False
async def plot_new_data(logger): """ Checks data against the last plotting time, and creates new plots for CO, CO2, and CH4 if new data exists. :param logger: logging logger at module level :return: boolean, did it run/process new data? """ logger.info('Running plot_new_data()') try: from pathlib import Path from summit_core import picarro_dir as rundir from summit_core import create_daily_ticks, connect_to_db, TempDir, Plot, core_dir, Config, add_or_ignore_plot from summit_picarro import Base, Datum, summit_picarro_plot plotdir = rundir / 'plots' remotedir = r'/data/web/htdocs/instaar/groups/arl/summit/plots' except Exception as e: logger.error('ImportError occurred in plot_new_data()') send_processor_email(PROC, exception=e) return False try: engine, session = connect_to_db('sqlite:///summit_picarro.sqlite', rundir) Base.metadata.create_all(engine) except Exception as e: logger.error(f'Exception {e.args} occurred in plot_new_data()') send_processor_email(PROC, exception=e) return False try: core_engine, core_session = connect_to_db( 'sqlite:///summit_core.sqlite', core_dir) Plot.__table__.create(core_engine, checkfirst=True) Config.__table__.create(core_engine, checkfirst=True) picarro_config = core_session.query(Config).filter( Config.processor == PROC).one_or_none() if not picarro_config: picarro_config = Config( processor=PROC ) # use all default values except processor on init core_session.add(picarro_config) core_session.commit() except Exception as e: logger.error( f'Error {e.args} prevented connecting to the core database in plot_new_data()' ) send_processor_email(PROC, exception=e) return False try: newest_data_point = (session.query(Datum.date).filter( Datum.mpv_position == 1).order_by(Datum.date.desc()).first()[0]) if newest_data_point <= picarro_config.last_data_date: logger.info('No new data was found to plot.') core_session.close() core_engine.dispose() session.close() engine.dispose() return False picarro_config.last_data_date = newest_data_point core_session.add(picarro_config) date_limits, major_ticks, minor_ticks = create_daily_ticks( picarro_config.days_to_plot) all_data = ( session.query(Datum.date, Datum.co, Datum.co2, Datum.ch4).filter(( Datum.mpv_position == 0) | (Datum.mpv_position == 1)).filter( (Datum.instrument_status == 963), (Datum.alarm_status == 0)).filter( Datum.date >= date_limits['left'] ) # grab only data that falls in plotting period .all()) if not all_data: logger.info('No new data was found to plot.') core_session.close() core_engine.dispose() session.close() engine.dispose() return False # get only ambient data dates = [] co = [] co2 = [] ch4 = [] for result in all_data: dates.append(result.date) co.append(result.co) co2.append(result.co2) ch4.append(result.ch4) with TempDir(plotdir): from summit_core import five_minute_medians dates_co, co = five_minute_medians(dates, co) name = summit_picarro_plot(None, ({ 'Summit CO': [dates_co, co] }), limits={ 'right': date_limits.get('right', None), 'left': date_limits.get('left', None), 'bottom': 60, 'top': 180 }, major_ticks=major_ticks, minor_ticks=minor_ticks) co_plot = Plot(plotdir / name, remotedir, True) # stage plots to be uploaded add_or_ignore_plot(co_plot, core_session) name = summit_picarro_plot(None, ({ 'Summit CO2': [dates, co2] }), limits={ 'right': date_limits.get('right', None), 'left': date_limits.get('left', None), 'bottom': 400, 'top': 420 }, major_ticks=major_ticks, minor_ticks=minor_ticks, unit_string='ppmv') co2_plot = Plot(plotdir / name, remotedir, True) # stage plots to be uploaded add_or_ignore_plot(co2_plot, core_session) name = summit_picarro_plot(None, ({ 'Summit Methane [Picarro]': [dates, ch4] }), limits={ 'right': date_limits.get('right', None), 'left': date_limits.get('left', None), 'bottom': 1850, 'top': 2050 }, major_ticks=major_ticks, minor_ticks=minor_ticks) ch4_plot = Plot(plotdir / name, remotedir, True) # stage plots to be uploaded add_or_ignore_plot(ch4_plot, core_session) logger.info('New data plots were created.') session.close() engine.dispose() core_session.commit() core_session.close() core_engine.dispose() return True except Exception as e: logger.error(f'Exception {e.args} occurred in plot_new_data()') send_processor_email(PROC, exception=e) session.close() engine.dispose() core_session.close() core_engine.dispose() return False
async def dual_plot_methane(logger): """ Connects to both the methane [gc] and picarro databases to create an overlayed plot of both data. :param logger: logger, to log events to :return: Boolean, True if it ran without error and created data, False if not """ PROC = 'Methane DualPlotter' try: from pathlib import Path from summit_core import core_dir, Config from summit_core import methane_dir from summit_core import picarro_dir from summit_core import connect_to_db, create_daily_ticks, TempDir, Plot, add_or_ignore_plot from summit_picarro import Datum from summit_methane import Base, GcRun, summit_methane_plot from summit_picarro import Base as PicarroBase remotedir = r'/data/web/htdocs/instaar/groups/arl/summit/plots' except ImportError as e: logger.error('ImportError occurred in dual_plot_methane()') send_processor_email(PROC, exception=e) return False try: gc_engine, gc_session = connect_to_db( 'sqlite:///summit_methane.sqlite', methane_dir) Base.metadata.create_all(gc_engine) picarro_engine, picarro_session = connect_to_db( 'sqlite:///summit_picarro.sqlite', picarro_dir) PicarroBase.metadata.create_all(picarro_engine) except Exception as e: logger.error( f'Exception {e.args} prevented connection to the database in dual_plot_methane()' ) send_processor_email(PROC, exception=e) return False try: core_engine, core_session = connect_to_db( 'sqlite:///summit_core.sqlite', core_dir) Plot.__table__.create(core_engine, checkfirst=True) Config.__table__.create(core_engine, checkfirst=True) twoplot_config = core_session.query(Config).filter( Config.processor == PROC).one_or_none() if not twoplot_config: twoplot_config = Config( processor=PROC ) # use all default values except processor on init core_session.add(twoplot_config) core_session.commit() except Exception as e: logger.error( f'Error {e.args} prevented connecting to the core database in plot_new_data()' ) send_processor_email(PROC, exception=e) return False try: logger.info('Running dual_plot_methane()') newest_picarro_data_point = (picarro_session.query(Datum.date).filter( Datum.mpv_position == 1).order_by(Datum.date.desc()).first()[0]) try: newest_gc_data_point = (gc_session.query(GcRun.date).filter( GcRun.median != None).filter(GcRun.standard_rsd < .02).filter( GcRun.rsd < .02).order_by(GcRun.date.desc()).first()[0]) except TypeError: logger.error( 'NoneType not subscriptable encountered due to lack of methane data to query.' ) from summit_errors import send_processor_warning send_processor_warning( PROC, 'Dual Plotter', '''The Methane Dual Plotter could not query any GcRuns for methane data.\n Check the database to make sure there are in fact GcRuns with medians and valid rsds. \nThis often happens when the methane database is remade without re-setting the filesize and pa_startlie in the config table of Core database, thus no peaks are found.''') return False newest_data_point = max(newest_picarro_data_point, newest_gc_data_point) if newest_data_point <= twoplot_config.last_data_date: logger.info('No new data was found to plot.') core_session.close() core_engine.dispose() picarro_session.close() picarro_engine.dispose() return False date_limits, major_ticks, minor_ticks = create_daily_ticks( twoplot_config.days_to_plot) if newest_data_point > twoplot_config.last_data_date: runs_with_medians = (gc_session.query(GcRun).filter( GcRun.median != None).filter(GcRun.standard_rsd < .02).filter( GcRun.rsd < .02).order_by(GcRun.date).all()) gc_dates = [run.date for run in runs_with_medians] gc_ch4 = [run.median for run in runs_with_medians] picarro_data = (picarro_session.query( Datum.date, Datum.ch4).filter((Datum.mpv_position == 0) | ( Datum.mpv_position == 1)).filter( (Datum.instrument_status == 963), (Datum.alarm_status == 0)).filter( Datum.date >= date_limits['left']).all() ) # grab only data that falls in plotting period picarro_dates = [p.date for p in picarro_data] picarro_ch4 = [p.ch4 for p in picarro_data] with TempDir(methane_dir / 'plots'): name = summit_methane_plot( None, { 'Summit Methane [Picarro]': [picarro_dates, picarro_ch4], 'Summit Methane [GC]': [gc_dates, gc_ch4] }, title='Summit Methane [Picarro & GC]', limits={ 'bottom': 1850, 'top': 2050, 'right': date_limits.get('right', None), 'left': date_limits.get('left', None) }, major_ticks=major_ticks, minor_ticks=minor_ticks) methane_plot = Plot(methane_dir / 'plots' / name, remotedir, True) # stage plots to be uploaded add_or_ignore_plot(methane_plot, core_session) twoplot_config.last_data_date = newest_data_point core_session.merge(twoplot_config) logger.info('New data plots created.') else: logger.info('No new data found to be plotted.') gc_session.close() gc_engine.dispose() picarro_session.close() picarro_engine.dispose() core_session.commit() core_session.close() core_engine.dispose() return True except Exception as e: logger.error(f'Exception {e.args} occurred in dual_plot_methane()') send_processor_email(PROC, exception=e) core_session.close() core_engine.dispose() gc_session.close() gc_engine.dispose() picarro_session.close() picarro_engine.dispose() return False
async def plot_new_data(logger): """ If newer data exists, plot it going back one week from the day of the plotting. :param logger: logger, to log events to :return: Boolean, True if it ran without error and created data, False if not """ try: from pathlib import Path from summit_core import core_dir, Config from summit_core import methane_dir as rundir from summit_core import connect_to_db, create_daily_ticks, TempDir, Plot, add_or_ignore_plot from summit_methane import Sample, GcRun, Base, plottable_sample, summit_methane_plot remotedir = r'/data/web/htdocs/instaar/groups/arl/summit/plots' except ImportError as e: logger.error('ImportError occurred in plot_new_data()') send_processor_email(PROC, exception=e) return False try: engine, session = connect_to_db('sqlite:///summit_methane.sqlite', rundir) Base.metadata.create_all(engine) except Exception as e: logger.error( f'Exception {e.args} prevented connection to the database in plot_new_data()' ) send_processor_email(PROC, exception=e) return False try: core_engine, core_session = connect_to_db( 'sqlite:///summit_core.sqlite', core_dir) Plot.__table__.create(core_engine, checkfirst=True) Config.__table__.create(core_engine, checkfirst=True) ch4_config = core_session.query(Config).filter( Config.processor == PROC).one_or_none() if not ch4_config: ch4_config = Config( processor=PROC ) # use all default values except processor on init core_session.add(ch4_config) core_session.commit() except Exception as e: logger.error( f'Error {e.args} prevented connecting to the core database in plot_new_data()' ) send_processor_email(PROC, exception=e) return False try: logger.info('Running plot_new_data()') engine, session = connect_to_db('sqlite:///summit_methane.sqlite', rundir) runs_with_medians = (session.query(GcRun).filter( GcRun.median != None).filter(GcRun.standard_rsd < .02).filter( GcRun.rsd < .02).order_by(GcRun.date).all()) last_ambient_date = runs_with_medians[-1].date # get date after filtering, ie don't plot if there's no new data getting plotted date_limits, major_ticks, minor_ticks = create_daily_ticks( ch4_config.days_to_plot) if last_ambient_date > ch4_config.last_data_date: ambient_dates = [run.date for run in runs_with_medians] ambient_mrs = [run.median for run in runs_with_medians] with TempDir(rundir / 'plots'): name = summit_methane_plot( None, {'Summit Methane [GC]': [ambient_dates, ambient_mrs]}, limits={ 'bottom': 1850, 'top': 2050, 'right': date_limits.get('right', None), 'left': date_limits.get('left', None) }, major_ticks=major_ticks, minor_ticks=minor_ticks) methane_plot = Plot(rundir / 'plots' / name, remotedir, True) # stage plots to be uploaded add_or_ignore_plot(methane_plot, core_session) ch4_config.last_data_date = last_ambient_date core_session.merge(ch4_config) logger.info('New data plots created.') else: logger.info('No new data found to be plotted.') session.close() engine.dispose() core_session.commit() core_session.close() core_engine.dispose() return True except Exception as e: logger.error(f'Exception {e.args} occurred in plot_new_data()') send_processor_email(PROC, exception=e) core_session.close() core_engine.dispose() session.close() engine.dispose() return False
async def check_load_pa_log(logger): """ Read the PA log and create new PaLine objects if possible. :param logger: logger, to log events to :return: Boolean, True if it ran without error and created data, False if not """ logger.info('Running check_load_pa_log()') try: from summit_core import methane_LOG_path as pa_filepath from summit_core import connect_to_db, check_filesize, core_dir, Config, split_into_sets_of_n from summit_methane import Base, read_pa_line, PaLine from summit_core import methane_dir as rundir from pathlib import Path except ImportError as e: logger.error('ImportError occurred in check_load_pa_log()') send_processor_email(PROC, exception=e) return False try: engine, session = connect_to_db('sqlite:///summit_methane.sqlite', rundir) Base.metadata.create_all(engine) except Exception as e: logger.error( f'Exception {e.args} prevented connection to the database in check_load_pa_log()' ) send_processor_email(PROC, exception=e) return False try: core_engine, core_session = connect_to_db( 'sqlite:///summit_core.sqlite', core_dir) Config.__table__.create(core_engine, checkfirst=True) ch4_config = core_session.query(Config).filter( Config.processor == PROC).one_or_none() if not ch4_config: ch4_config = Config( processor=PROC ) # use all default values except processor on init core_session.add(ch4_config) core_session.commit() except Exception as e: logger.error( f'Error {e.args} prevented connecting to the core database in plot_new_data()' ) send_processor_email(PROC, exception=e) return False try: if check_filesize(pa_filepath) <= ch4_config.filesize: logger.info('PA file did not change size.') return False ch4_config.filesize = check_filesize(pa_filepath) core_session.merge(ch4_config) core_session.commit() line_to_start = ch4_config.pa_startline - 3 # pad start to avoid missing samples if line_to_start < 0: line_to_start = 0 pa_file_contents = pa_filepath.read_text().split('\n')[line_to_start:] ch4_config.pa_startline = ch4_config.pa_startline + len( pa_file_contents) - 1 pa_file_contents[:] = [line for line in pa_file_contents if line] pa_lines = [] for line in pa_file_contents: pa_lines.append(read_pa_line(line)) if not pa_lines: logger.info('No new PaLines found.') return False else: ct = 0 # count committed logs all_line_dates = [line.date for line in pa_lines] # SQLite can't take in clauses with > 1000 variables, so chunk to sets of 500 if len(all_line_dates) > 500: sets = split_into_sets_of_n(all_line_dates, 500) else: sets = [all_line_dates] # TODO: Can be reduced to just splitting, this step is done automatically by split_into. dates_already_in_db = [] for set in sets: set_matches = session.query(PaLine.date).filter( PaLine.date.in_(set)).all() set_matches[:] = [s.date for s in set_matches] dates_already_in_db.extend(set_matches) for line in pa_lines: if line.date not in dates_already_in_db: session.add(line) logger.info(f'PaLine for {line.date} added.') ct += 1 if ct == 0: logger.info('No new PaLines found.') else: logger.info(f'{ct} PaLines added.') session.commit() core_session.merge(ch4_config) core_session.commit() session.close() engine.dispose() core_session.close() core_engine.dispose() return True except Exception as e: session.close() engine.dispose() core_session.close() core_engine.dispose() logger.error(f'Exception {e.args} occurred in check_load_pa_log()') send_processor_email(PROC, exception=e) return False
async def read_excel_sheet(logger): logger.info('Running update_excel_sheet()') try: import pandas as pd from datetime import datetime from summit_core import methane_dir as rundir from summit_errors import send_processor_warning from summit_methane import GcRun, Base, add_formulas_and_format_sheet from summit_core import Config, connect_to_db, append_df_to_excel from summit_core import methane_dir, core_dir, data_file_paths methane_sheet = data_file_paths.get('methane_sheet', None) if not methane_sheet: logger.error( 'Filepath for the methane integration sheet could not be retrieved.' ) send_processor_warning( PROC, 'Filepath Error', '''The methane integration sheet filepath could not be retrieved. It should be listed as "methane_sheet" in file_locations.json in the core folder.''' ) return False except ImportError as e: logger.error('ImportError occurred in update_excel_sheet()') send_processor_email(PROC, exception=e) return False try: engine, session = connect_to_db('sqlite:///summit_methane.sqlite', rundir) Base.metadata.create_all(engine) except Exception as e: logger.error( f'Exception {e.args} prevented connection to the database in update_excel_sheet()' ) send_processor_email(PROC, exception=e) return False try: core_engine, core_session = connect_to_db( 'sqlite:///summit_core.sqlite', core_dir) Config.__table__.create(core_engine, checkfirst=True) methane_sheet_read_config = (core_session.query(Config).filter( Config.processor == 'methane_sheet_read').one_or_none()) if not methane_sheet_read_config: methane_sheet_read_config = Config(processor='methane_sheet_read') # use all default values except processor on init core_session.add(methane_sheet_read_config) core_session.commit() except Exception as e: logger.error( f'Error {e.args} prevented connecting to the core database in update_excel_sheet()' ) send_processor_email(PROC, exception=e) return False try: core_session.merge(methane_sheet_read_config) core_session.commit() session.close() engine.dispose() core_session.close() core_engine.dispose() return True except Exception as e: session.close() engine.dispose() core_session.close() core_engine.dispose() logger.error(f'Exception {e.args} occurred in update_excel_sheet()') send_processor_email(PROC, exception=e) return False
from summit_core import voc_dir, core_dir from summit_voc import LogFile import pandas as pd engine, session = connect_to_db('sqlite:///Jsummit_voc.sqlite', voc_dir) core_engine, core_session = connect_to_db('sqlite:///Jsummit_core.sqlite', core_dir) Config.__table__.create(core_engine, checkfirst=True) logcheck_config = core_session.query(Config).filter( Config.processor == 'Log Checking').one_or_none() if not logcheck_config: logcheck_config = Config( processor='Log Checking', days_to_plot=21) # use all default values except processor on init core_session.add(logcheck_config) core_session.commit() # Query the VOC Database for the most recent logfile data recentDate = ( session # open session .query(LogFile.date) # gather date .order_by(LogFile.date.desc()) # order by desc .first()[0]) # grab just the first value failed = [] # failed var for later # If the most recent date is greater than the last one, we query for all logs greater than it, save the date of the # last one, and then apply various actions to them