Пример #1
0
async def match_runs_to_lines(logger):
    """
    Read new log files and create new GcRun and Sample objects if possible.

    :param logger: logger, to log events to
    :return: Boolean, True if it ran without error and created data, False if not
    """

    try:
        from summit_core import methane_dir as rundir
        from summit_core import connect_to_db
        from summit_methane import GcRun, PaLine, match_lines_to_runs, Base
    except ImportError as e:
        send_processor_email(PROC, exception=e)
        logger.error('ImportError occured in match_runs_to_lines()')
        return False

    try:
        engine, session = connect_to_db('sqlite:///summit_methane.sqlite',
                                        rundir)
        Base.metadata.create_all(engine)
    except Exception as e:
        logger.error(
            f'Exception {e.args} prevented connection to the database in check_load_pa_log()'
        )
        send_processor_email(PROC, exception=e)
        return False

    try:
        logger.info('Running match_runs_to_peaks()')

        engine, session = connect_to_db('sqlite:///summit_methane.sqlite',
                                        rundir)

        unmatched_lines = session.query(PaLine).filter(
            PaLine.run == None).all()
        unmatched_runs = session.query(GcRun).filter(
            GcRun.pa_line_id == None).all()

        # married_runs_count = session.query(GcRun).filter(GcRun.status == 'married').count()

        lines, runs, count = match_lines_to_runs(unmatched_lines,
                                                 unmatched_runs)

        session.commit()

        if count:
            logger.info(f'{count} GcRuns matched with PaLines.')
            return True
        else:
            logger.info('No new GcRun-PaLine pairs matched.')
            return False

    except Exception as e:
        logger.error(f'Exception {e.args} occurred in match_runs_to_lines()')
        send_processor_email(PROC, exception=e)
        return False
Пример #2
0
async def add_one_standard(logger):
    """
    Add a single standard (the current working one), so that quantifications are possible. VERY TEMPORARY.

    :param logger: logger, to log events to
    :return: Boolean, True if successful
    """

    try:
        from summit_core import methane_dir as rundir
        from summit_core import connect_to_db
        from summit_methane import Standard, Base
    except ImportError as e:
        logger.error('ImportError occurred in add_one_standard()')
        send_processor_email(PROC, exception=e)
        return False

    try:
        engine, session = connect_to_db('sqlite:///summit_methane.sqlite',
                                        rundir)
        Base.metadata.create_all(engine)
    except Exception as e:
        logger.error(
            f'Exception {e.args} prevented connection to the database in check_load_pa_log()'
        )
        send_processor_email(PROC, exception=e)
        return False

    try:
        engine, session = connect_to_db('sqlite:///summit_methane.sqlite',
                                        rundir)

        current_standard_dates = [
            S.date_st for S in session.query(Standard).all()
        ]

        my_only_standard = Standard('ws_2019', 2067.16, datetime(2019, 1, 1),
                                    datetime(2019, 12, 31))

        if my_only_standard.date_st not in current_standard_dates:
            session.merge(my_only_standard)
            session.commit()

        session.close()
        engine.dispose()
        return True

    except Exception as e:
        logger.error(f'Exception {e.args} occurred in add_one_standard()')
        send_processor_email(PROC, exception=e)
        return False
Пример #3
0
def get_last_processor_date(processor, logger):
    """
    Retrieves the latest high-level date for the specified processor. It looks at GcRuns for VOCs (complete runs),
    5-second Datums for the Picarro, and matched GcRuns for methane.
    :param processor: str, in ['voc', 'picarro', 'methane']
    :param logger: logging logger
    :return: datetime, date of last data point for the specified processor
    """

    from summit_core import connect_to_db, TempDir

    if processor is 'voc':
        from summit_core import voc_dir as directory
        from summit_voc import GcRun as DataType
    elif processor is 'picarro':
        from summit_core import picarro_dir as directory
        from summit_picarro import Datum as DataType
    elif processor is 'methane':
        from summit_core import methane_dir as directory
        from summit_methane import GcRun as DataType
    else:
        logger.error('Invalid processor supplied to get_last_processor_date()')
        assert False, 'Invalid processor supplied to get_last_processor_date()'

    with TempDir(directory):
        engine, session = connect_to_db(f'sqlite:///summit_{processor}.sqlite',
                                        directory)
        val = session.query(DataType.date).order_by(
            DataType.date.desc()).first()

        if val:
            val = val[0]

    session.close()
    engine.dispose()

    return val
Пример #4
0
    ax.set_ylabel(f'Mixing Ratio ({unit_string})', fontsize=20)
    ax.set_title(f'{comp_list}', fontsize=24, y=1.02)
    ax.legend()

    f1.subplots_adjust(bottom=.20)

    plot_name = f'{fn_list}_last_week.png'
    f1.savefig(plot_name, dpi=150)
    plt.close(f1)

    return plot_name  # wanted to return the figure to add to it with methane


# Connect to the Picarro DB
rundir = r'C:\Users\ARL\Desktop\Testing DB'  # location of DB
engine, session = connect_to_db('sqlite:///Jsummit_picarro.sqlite',
                                rundir)  # Create eng & sess
Base.metadata.create_all(engine)  # Create base

date_limits, major_ticks, minor_ticks = custom_create_daily_ticks(6)
all_data = (
    session.query(Datum.date, Datum.ch4)  # get date and methane
    .filter((Datum.mpv_position == 0.0) |
            (Datum.mpv_position == 1.0))  # filter for not cal events
    .filter((Datum.instrument_status == 963),
            (Datum.alarm_status == 0))  # filter out bad data
    .filter(Datum.date >= date_limits['left'])  # just get certain dates
    .all())

# Gather the Picarro Methane Data
picarro_dates = []
picarro_ch4 = []
import pandas as pd
from datetime import datetime

from summit_methane import GcRun, add_formulas_and_format_sheet
from summit_core import connect_to_db, append_df_to_excel
from summit_core import methane_dir, data_file_paths

methane_sheet = data_file_paths.get('methane_sheet', None)

if not methane_sheet:
    pass
    # TODO: ERROR!

engine, session = connect_to_db('sqlite:///summit_methane.sqlite', methane_dir)

runs_for_this_year = session.query(GcRun).filter(
    GcRun.date.between(datetime(2019, 1, 1), datetime.now())).all()

col_list = [
    'date', 'filename', 'peak1', 'peak2', 'mr1', 'mr2', 'run_median',
    'run_rsd', 'std_median', 'std_rsd'
]

master_df = pd.DataFrame(index=None, columns=col_list)

for run in runs_for_this_year:
    df = pd.DataFrame(index=range(1, 6), columns=col_list)
    df['date'][1] = run.date
    df['filename'][1] = run.logfile.name

    # The below can be turned on to copy peak information from the automatic integrations into the spreadsheet
Пример #6
0
async def update_excel_sheet(logger):
    """
    This checks for new GcRuns since it was last ran and creates a DataFrame containing run information that's appended
    to a spreadsheet on the Z-drive. This sheet is filled out by whoever does the manual integration, and is later read
    by TODO - I haven't written that yet
    to bring the updated peak areas back into the database and re-calculate mixing ratios.

    :param logger: logging logger for info and failures
    :return: bool, True if ran, False if errored
    """
    logger.info('Running update_excel_sheet()')

    try:
        import pandas as pd
        from datetime import datetime

        from summit_core import methane_dir as rundir
        from summit_errors import send_processor_warning

        from summit_methane import GcRun, Base, add_formulas_and_format_sheet
        from summit_core import Config, connect_to_db, append_df_to_excel
        from summit_core import methane_dir, core_dir, data_file_paths

        methane_sheet = data_file_paths.get('methane_sheet', None)

        if not methane_sheet:
            logger.error(
                'Filepath for the methane integration sheet could not be retrieved.'
            )
            send_processor_warning(
                PROC, 'Filepath Error',
                '''The methane integration sheet filepath could not be retrieved. It should be listed
                                   as "methane_sheet" in file_locations.json in the core folder.'''
            )
            return False

    except ImportError as e:
        logger.error('ImportError occurred in update_excel_sheet()')
        send_processor_email(PROC, exception=e)
        return False

    try:
        engine, session = connect_to_db('sqlite:///summit_methane.sqlite',
                                        rundir)
        Base.metadata.create_all(engine)
    except Exception as e:
        logger.error(
            f'Exception {e.args} prevented connection to the database in update_excel_sheet()'
        )
        send_processor_email(PROC, exception=e)
        return False

    try:
        core_engine, core_session = connect_to_db(
            'sqlite:///summit_core.sqlite', core_dir)
        Config.__table__.create(core_engine, checkfirst=True)

        methane_sheet_config = core_session.query(Config).filter(
            Config.processor == 'methane_sheet').one_or_none()

        if not methane_sheet_config:
            methane_sheet_config = Config(processor='methane_sheet')
            # use all default values except processor on init
            core_session.add(methane_sheet_config)
            core_session.commit()

    except Exception as e:
        logger.error(
            f'Error {e.args} prevented connecting to the core database in update_excel_sheet()'
        )
        send_processor_email(PROC, exception=e)
        return False

    try:
        most_recent_gcrun = session.query(GcRun.date).order_by(
            GcRun.date.desc()).first()

        if not most_recent_gcrun:
            most_recent_gcrun = datetime(
                1900, 1, 1)  # default to a safely historic date
        else:
            most_recent_gcrun = most_recent_gcrun.date  # get date from tuple response

        # object list of all the runs past the most recent date
        new_runs = session.query(GcRun).filter(
            GcRun.date > methane_sheet_config.last_data_date).all()

        if new_runs:
            col_list = [
                'date', 'filename', 'peak1', 'peak2', 'mr1', 'mr2',
                'run_median', 'run_rsd', 'std_median', 'std_rsd'
            ]  # list of all columns needed in the dataframe

            master_df = pd.DataFrame(
                index=None,
                columns=col_list)  # frame an empty df for new run data

            for run in new_runs:
                df = pd.DataFrame(
                    index=range(1, 6),
                    columns=col_list)  # create a five-row block to add later
                df['date'][1] = run.date
                df['filename'][
                    1] = run.logfile.name  # add date and filename for this block

                # The below can copy peak information from the automatic integrations into the spreadsheet
                # peaks1 = [sample.peak for sample in run.samples if sample.sample_num in [0,2,4,6,8]]
                # peaks2 = [sample.peak for sample in run.samples if sample.sample_num in [1,3,5,7,9]]
                # df.loc[0:5, 'peak1'] = [(peak.pa if peak else None) for peak in peaks1]
                # df.loc[0:5, 'peak2'] = [(peak.pa if peak else None) for peak in peaks2]

                master_df = master_df.append(
                    df)  # append block to all new ones so far

            # TODO: Anything touching sheets need to be carefully made to catch inacessible files ######################
            append_df_to_excel(methane_sheet, master_df,
                               **{'index':
                                  False})  # add all new lines and save sheet
            add_formulas_and_format_sheet(
                methane_sheet
            )  # open sheet and add formulas where non-existent, format cols

            logger.info(
                'New GcRuns added to the automated integration spreadsheet.')

            methane_sheet_config.last_data_date = most_recent_gcrun
        else:
            logger.info(
                'No new GcRuns found to add to the automated integration spreadsheet.'
            )

        core_session.merge(methane_sheet_config)
        core_session.commit()

        session.close()
        engine.dispose()
        core_session.close()
        core_engine.dispose()
        return True

    except Exception as e:
        session.close()
        engine.dispose()
        core_session.close()
        core_engine.dispose()
        logger.error(f'Exception {e.args} occurred in update_excel_sheet()')
        send_processor_email(PROC, exception=e)
        return False
Пример #7
0
async def dual_plot_methane(logger):
    """
    Connects to both the methane [gc] and picarro databases to create an overlayed plot of both data.

    :param logger: logger, to log events to
    :return: Boolean, True if it ran without error and created data, False if not
    """

    PROC = 'Methane DualPlotter'

    try:
        from pathlib import Path
        from summit_core import core_dir, Config
        from summit_core import methane_dir
        from summit_core import picarro_dir
        from summit_core import connect_to_db, create_daily_ticks, TempDir, Plot, add_or_ignore_plot
        from summit_picarro import Datum
        from summit_methane import Base, GcRun, summit_methane_plot

        from summit_picarro import Base as PicarroBase

        remotedir = r'/data/web/htdocs/instaar/groups/arl/summit/plots'

    except ImportError as e:
        logger.error('ImportError occurred in dual_plot_methane()')
        send_processor_email(PROC, exception=e)
        return False

    try:
        gc_engine, gc_session = connect_to_db(
            'sqlite:///summit_methane.sqlite', methane_dir)
        Base.metadata.create_all(gc_engine)

        picarro_engine, picarro_session = connect_to_db(
            'sqlite:///summit_picarro.sqlite', picarro_dir)
        PicarroBase.metadata.create_all(picarro_engine)
    except Exception as e:
        logger.error(
            f'Exception {e.args} prevented connection to the database in dual_plot_methane()'
        )
        send_processor_email(PROC, exception=e)
        return False

    try:
        core_engine, core_session = connect_to_db(
            'sqlite:///summit_core.sqlite', core_dir)
        Plot.__table__.create(core_engine, checkfirst=True)
        Config.__table__.create(core_engine, checkfirst=True)

        twoplot_config = core_session.query(Config).filter(
            Config.processor == PROC).one_or_none()

        if not twoplot_config:
            twoplot_config = Config(
                processor=PROC
            )  # use all default values except processor on init
            core_session.add(twoplot_config)
            core_session.commit()

    except Exception as e:
        logger.error(
            f'Error {e.args} prevented connecting to the core database in plot_new_data()'
        )
        send_processor_email(PROC, exception=e)
        return False

    try:
        logger.info('Running dual_plot_methane()')

        newest_picarro_data_point = (picarro_session.query(Datum.date).filter(
            Datum.mpv_position == 1).order_by(Datum.date.desc()).first()[0])
        try:
            newest_gc_data_point = (gc_session.query(GcRun.date).filter(
                GcRun.median != None).filter(GcRun.standard_rsd < .02).filter(
                    GcRun.rsd < .02).order_by(GcRun.date.desc()).first()[0])
        except TypeError:
            logger.error(
                'NoneType not subscriptable encountered due to lack of methane data to query.'
            )
            from summit_errors import send_processor_warning
            send_processor_warning(
                PROC, 'Dual Plotter',
                '''The Methane Dual Plotter could not query any GcRuns for methane data.\n
                                   Check the database to make sure there are in fact GcRuns with medians and valid rsds.
                                   \nThis often happens when the methane database is remade without re-setting 
                                   the filesize and pa_startlie in the config table of Core database, 
                                   thus no peaks are found.''')
            return False

        newest_data_point = max(newest_picarro_data_point,
                                newest_gc_data_point)

        if newest_data_point <= twoplot_config.last_data_date:
            logger.info('No new data was found to plot.')
            core_session.close()
            core_engine.dispose()
            picarro_session.close()
            picarro_engine.dispose()
            return False

        date_limits, major_ticks, minor_ticks = create_daily_ticks(
            twoplot_config.days_to_plot)

        if newest_data_point > twoplot_config.last_data_date:

            runs_with_medians = (gc_session.query(GcRun).filter(
                GcRun.median != None).filter(GcRun.standard_rsd < .02).filter(
                    GcRun.rsd < .02).order_by(GcRun.date).all())

            gc_dates = [run.date for run in runs_with_medians]
            gc_ch4 = [run.median for run in runs_with_medians]

            picarro_data = (picarro_session.query(
                Datum.date, Datum.ch4).filter((Datum.mpv_position == 0) | (
                    Datum.mpv_position == 1)).filter(
                        (Datum.instrument_status == 963),
                        (Datum.alarm_status == 0)).filter(
                            Datum.date >= date_limits['left']).all()
                            )  # grab only data that falls in plotting period

            picarro_dates = [p.date for p in picarro_data]
            picarro_ch4 = [p.ch4 for p in picarro_data]

            with TempDir(methane_dir / 'plots'):
                name = summit_methane_plot(
                    None, {
                        'Summit Methane [Picarro]':
                        [picarro_dates, picarro_ch4],
                        'Summit Methane [GC]': [gc_dates, gc_ch4]
                    },
                    title='Summit Methane [Picarro & GC]',
                    limits={
                        'bottom': 1850,
                        'top': 2050,
                        'right': date_limits.get('right', None),
                        'left': date_limits.get('left', None)
                    },
                    major_ticks=major_ticks,
                    minor_ticks=minor_ticks)

                methane_plot = Plot(methane_dir / 'plots' / name, remotedir,
                                    True)  # stage plots to be uploaded
                add_or_ignore_plot(methane_plot, core_session)

                twoplot_config.last_data_date = newest_data_point
                core_session.merge(twoplot_config)

            logger.info('New data plots created.')
        else:
            logger.info('No new data found to be plotted.')

        gc_session.close()
        gc_engine.dispose()

        picarro_session.close()
        picarro_engine.dispose()

        core_session.commit()

        core_session.close()
        core_engine.dispose()
        return True

    except Exception as e:
        logger.error(f'Exception {e.args} occurred in dual_plot_methane()')
        send_processor_email(PROC, exception=e)

        core_session.close()
        core_engine.dispose()

        gc_session.close()
        gc_engine.dispose()

        picarro_session.close()
        picarro_engine.dispose()
        return False
Пример #8
0
async def check_load_dailies(logger):
    """
    TODO:

    :param logger: logger, to log events to
    :return: Boolean, True if it ran without error and created data, False if not
    """

    try:
        from summit_core import connect_to_db, get_all_data_files, core_dir, daily_logs_path, search_for_attr_value
    except ImportError as e:
        logger.error(f'ImportError occurred in check_load_dailies()')
        send_processor_email(PROC, exception=e)
        return False

    try:
        engine, session = connect_to_db('sqlite:///summit_daily.sqlite',
                                        core_dir)
        Base.metadata.create_all(engine)
    except Exception as e:
        logger.error(
            f'Error {e.args} prevented connecting to the database in check_load_dailies()'
        )
        send_processor_email(PROC, exception=e)
        return False

    try:
        logger.info('Running check_load_dailies()')

        daily_files_in_db = session.query(DailyFile).all()

        daily_files = [
            DailyFile(path)
            for path in get_all_data_files(daily_logs_path, '.txt')
        ]

        new_files = []

        for file in daily_files:
            file_in_db = search_for_attr_value(daily_files_in_db, 'path',
                                               file.path)

            if not file_in_db:
                new_files.append(file)
                logger.info(f'File {file.name} added for processing.')
            else:
                if file.size > file_in_db.size:
                    logger.info(
                        f'File {file_in_db.name} added to process additional data.'
                    )
                    new_files.append(file_in_db)

        if new_files:
            for file in new_files:
                dailies = read_daily_file(file.path)
                file_daily_dates = [d.date for d in file.entries]
                file.entries.extend(
                    [d for d in dailies if d.date not in file_daily_dates])
                file.size = file.path.stat().st_size
                session.merge(file)

            session.commit()

        session.close()
        engine.dispose()
        return True

    except Exception as e:
        logger.error(f'Exception {e.args} occurred in check_load_dailies()')
        send_processor_email(PROC, exception=e)
        session.close()
        engine.dispose()
        return False
Пример #9
0
async def find_cal_events(logger):
    """
    Searches the existing data for unused calibration data and creates/commits CalEvents if possible.

    :param logger: logging logger at module level
    :return: boolean, did it run/process new data?
    """

    logger.info('Running find_cal_events()')
    try:
        from summit_core import connect_to_db
        from summit_core import picarro_dir as rundir
        from summit_picarro import Base, Datum, CalEvent, mpv_converter, find_cal_indices
        from summit_picarro import log_event_quantification, filter_postcal_data
    except Exception as e:
        logger.error('ImportError occured in find_cal_events()')
        send_processor_email(PROC, exception=e)
        return False

    try:
        engine, session = connect_to_db('sqlite:///summit_picarro.sqlite',
                                        rundir)
        Base.metadata.create_all(engine)
    except Exception as e:
        logger.error(f'Exception {e.args} occurred in find_cal_events()')
        send_processor_email(PROC, exception=e)
        return False

    try:
        standard_data = {}
        for MPV in [2, 3, 4]:
            mpv_data = pd.DataFrame(
                session.query(
                    Datum.id,
                    Datum.date).filter(Datum.mpv_position == MPV).filter(
                        Datum.cal_id == None).all())
            # get only data for this switching valve position, and not already in any calibration event

            if not len(mpv_data):
                logger.info(
                    f'No new calibration events found for standard {mpv_converter[MPV]}'
                )
                continue

            mpv_data['date'] = pd.to_datetime(mpv_data['date'])
            # use mpv_converter dict to get standard information
            standard_data[mpv_converter[MPV]] = mpv_data.sort_values(
                by=['date']).reset_index(drop=True)

        for standard, data in standard_data.items():
            indices = find_cal_indices(data['date'])

            cal_events = []

            if not len(indices) and len(data):
                # if there's not provided indices, but there's still calibration data, create the one event
                event_data = session.query(Datum).filter(
                    Datum.id.in_(data['id'])).all()
                cal_events.append(CalEvent(event_data, standard))

            elif not len(indices):
                # if there's no provided indices
                logger.info(
                    f'No new cal events were found for {standard} standard.')
                continue

            prev_ind = 0

            for num, ind in enumerate(
                    indices):  # get all data within this event
                event_data = session.query(Datum).filter(
                    Datum.id.in_(data['id'].iloc[prev_ind:ind])).all()
                cal_events.append(CalEvent(event_data, standard))

                if num == (
                        len(indices) - 1
                ):  # if it's the last index, get all ahead of it as the last event
                    event_data = session.query(Datum).filter(
                        Datum.id.in_(data['id'].iloc[ind:])).all()
                    cal_events.append(CalEvent(event_data, standard))

                prev_ind = ind

            for ev in cal_events:

                filter_postcal_data(
                    ev, session
                )  # flag the following minute as questionable data (inst_status = 999)

                if ev.date - ev.dates[0] < dt.timedelta(seconds=90):
                    logger.info(
                        f'CalEvent for date {ev.date} had a duration < 90s and was ignored.'
                    )
                    ev.standard_used = 'dump'  # give not-long-enough events standard type 'dump' so they're ignored
                    session.merge(ev)
                else:
                    for cpd in ['co', 'co2', 'ch4']:
                        ev.calc_result(
                            cpd, 21
                        )  # calculate results for all compounds going 21s back

                    session.merge(ev)
                    logger.info(f'CalEvent for date {ev.date} added.')
                    log_event_quantification(
                        logger, ev)  # show quantification info as DEBUG in log
            session.commit()
        return True

    except Exception as e:
        logger.error(f'Exception {e.args} occurred in find_cal_events()')
        send_processor_email(PROC, exception=e)
        return False
Пример #10
0
async def check_load_new_data(logger):
    """
    Checks for new files, checks length of old ones for updates, and processes/commits new data to the database.

    :param logger: logging logger at module level
    :return: boolean, did it run/process new data?
    """

    logger.info('Running check_load_new_data()')

    try:
        from summit_core import picarro_logs_path as data_path
        from summit_core import picarro_dir as rundir
        from summit_core import connect_to_db, get_all_data_files, check_filesize
        from summit_picarro import Base, DataFile, Datum
        from sqlalchemy.orm.exc import MultipleResultsFound
        from summit_errors import EmailTemplate, sender, processor_email_list

        from pandas.errors import ParserError
    except ImportError as e:
        logger.error('ImportError occurred in check_load_new_data()')
        send_processor_email(PROC, exception=e)
        return False

    try:
        engine, session = connect_to_db('sqlite:///summit_picarro.sqlite',
                                        rundir)
        Base.metadata.create_all(engine)
    except Exception as e:
        logger.error(
            f'Exception {e.args} caused database connection to fail in check_load_new_data()'
        )
        send_processor_email(PROC, exception=e)
        return False

    try:
        db_files = session.query(DataFile)
        db_filenames = [d.name for d in db_files.all()]

        all_available_files = get_all_data_files(data_path, '.dat')

        files_to_process = session.query(DataFile).filter(
            DataFile.processed == False).all()

        for file in all_available_files:
            try:
                db_match = db_files.filter(
                    DataFile._name == file.name).one_or_none()
            except MultipleResultsFound:
                logger.warning(
                    f'Multiple results found for file {file.name}. The first was used.'
                )
                db_match = db_files.filter(DataFile._name == file.name).first()

            if file.name not in db_filenames:
                files_to_process.append(DataFile(file))
            elif check_filesize(file) > db_match.size:
                # if a matching file was found and it's now bigger, append for processing
                logger.info(
                    f'File {file.name} had more data and was added for procesing.'
                )
                files_to_process.append(db_match)

        if not files_to_process:
            logger.warning('No new data was found.')
            return False

        for ind, file in enumerate(files_to_process):
            files_to_process[ind] = session.merge(
                file
            )  # merge files and return the merged object to overwrite the old
            logger.info(f'File {file.name} added for processing.')
        session.commit()

        for file in files_to_process:
            try:
                df = pd.read_csv(file.path, delim_whitespace=True)
            except EmptyDataError as e:
                logger.error(
                    f'Exception {e.args} occurred while reading {file.name}')
                send_processor_email(PROC, exception=e)
                continue
            except ParserError as e:
                logger.error(
                    f'Pandas ParserError occurred while reading {file.name}.')
                from summit_errors import send_processor_warning
                try:
                    df = pd.read_csv(file.path,
                                     delim_whitespace=True,
                                     error_bad_lines=False)
                    send_processor_warning(PROC, 'Dataframe', (
                        f'The Picarro Processor failed to read file {file.name} '
                        +
                        'It was re-parsed, skipping unreadable lines, but should be'
                        + ' investigated.'))

                except Exception as e:
                    logger.error(
                        f'Exception {e.args} occurred in check_load_new_data() while reading a file.'
                        + f' The file was {file.name}')
                    send_processor_email(PROC, exception=e)
                    continue
            except Exception as e:
                logger.error(
                    f'Exception {e.args} occurred in check_load_new_data() while reading a file.'
                    + f' The file was {file.name}')
                send_processor_email(PROC, exception=e)
                continue

            original_length = len(df)

            df.dropna(axis=0, how='any', inplace=True)

            new_length = len(df)
            diff = original_length - new_length

            if diff:
                logger.warning(
                    f'Dataframe contained {diff} null values in {file.name}.')
                from summit_errors import send_processor_warning

                send_processor_warning(PROC, 'DataFrame', (
                    f'The Picarro Processor cut {diff} lines from a dataframe after reading it.\n'
                    +
                    f'{file.name} should be investigated and cleaned if necessary.'
                ))

            # CO2 stays in ppm
            df['CO_sync'] *= 1000  # convert CO to ppb
            df['CH4_sync'] *= 1000  # convert CH4 to ppb
            df['CH4_dry_sync'] *= 1000

            df_list = df.to_dict('records')  # convert to list of dicts

            data_list = []
            for line in df_list:
                data_list.append(Datum(line))

            if data_list:
                data_dates = [d.date for d in data_list]
                dates_already_in_db = session.query(Datum.date).filter(
                    Datum.date.in_(data_dates)).all()
                dates_already_in_db[:] = [d.date for d in dates_already_in_db]

                for d in data_list:
                    if d.date not in dates_already_in_db:
                        d.file_id = file.id  # relate Datum to the file it originated in
                        session.add(d)
            else:
                logger.info(f'No new data created from file {file.name}.')

            file.processed = True
            file.size = check_filesize(file.path)
            logger.info(f'All data in file {file.name} processed.')
            session.commit()

        return True

    except Exception as e:
        logger.error(f'Exception {e.args} occurred in check_load_new_data().')
        send_processor_email(PROC, exception=e)
        return False
Пример #11
0
async def check_load_pa_log(logger):
    """
    Read the PA log and create new PaLine objects if possible.

    :param logger: logger, to log events to
    :return: Boolean, True if it ran without error and created data, False if not
    """

    logger.info('Running check_load_pa_log()')

    try:
        from summit_core import methane_LOG_path as pa_filepath
        from summit_core import connect_to_db, check_filesize, core_dir, Config, split_into_sets_of_n
        from summit_methane import Base, read_pa_line, PaLine
        from summit_core import methane_dir as rundir
        from pathlib import Path
    except ImportError as e:
        logger.error('ImportError occurred in check_load_pa_log()')
        send_processor_email(PROC, exception=e)
        return False

    try:
        engine, session = connect_to_db('sqlite:///summit_methane.sqlite',
                                        rundir)
        Base.metadata.create_all(engine)
    except Exception as e:
        logger.error(
            f'Exception {e.args} prevented connection to the database in check_load_pa_log()'
        )
        send_processor_email(PROC, exception=e)
        return False

    try:
        core_engine, core_session = connect_to_db(
            'sqlite:///summit_core.sqlite', core_dir)
        Config.__table__.create(core_engine, checkfirst=True)

        ch4_config = core_session.query(Config).filter(
            Config.processor == PROC).one_or_none()

        if not ch4_config:
            ch4_config = Config(
                processor=PROC
            )  # use all default values except processor on init
            core_session.add(ch4_config)
            core_session.commit()

    except Exception as e:
        logger.error(
            f'Error {e.args} prevented connecting to the core database in plot_new_data()'
        )
        send_processor_email(PROC, exception=e)
        return False

    try:
        if check_filesize(pa_filepath) <= ch4_config.filesize:
            logger.info('PA file did not change size.')
            return False

        ch4_config.filesize = check_filesize(pa_filepath)
        core_session.merge(ch4_config)
        core_session.commit()

        line_to_start = ch4_config.pa_startline - 3  # pad start to avoid missing samples
        if line_to_start < 0:
            line_to_start = 0

        pa_file_contents = pa_filepath.read_text().split('\n')[line_to_start:]

        ch4_config.pa_startline = ch4_config.pa_startline + len(
            pa_file_contents) - 1

        pa_file_contents[:] = [line for line in pa_file_contents if line]

        pa_lines = []
        for line in pa_file_contents:
            pa_lines.append(read_pa_line(line))

        if not pa_lines:
            logger.info('No new PaLines found.')
            return False
        else:
            ct = 0  # count committed logs
            all_line_dates = [line.date for line in pa_lines]

            # SQLite can't take in clauses with > 1000 variables, so chunk to sets of 500
            if len(all_line_dates) > 500:
                sets = split_into_sets_of_n(all_line_dates, 500)
            else:
                sets = [all_line_dates]
                # TODO: Can be reduced to just splitting, this step is done automatically by split_into.

            dates_already_in_db = []
            for set in sets:
                set_matches = session.query(PaLine.date).filter(
                    PaLine.date.in_(set)).all()
                set_matches[:] = [s.date for s in set_matches]
                dates_already_in_db.extend(set_matches)

            for line in pa_lines:
                if line.date not in dates_already_in_db:
                    session.add(line)
                    logger.info(f'PaLine for {line.date} added.')
                    ct += 1

            if ct == 0:
                logger.info('No new PaLines found.')
            else:
                logger.info(f'{ct} PaLines added.')
                session.commit()

        core_session.merge(ch4_config)
        core_session.commit()

        session.close()
        engine.dispose()
        core_session.close()
        core_engine.dispose()
        return True

    except Exception as e:
        session.close()
        engine.dispose()
        core_session.close()
        core_engine.dispose()
        logger.error(f'Exception {e.args} occurred in check_load_pa_log()')
        send_processor_email(PROC, exception=e)
        return False
Пример #12
0
def main():
    """
    This function creates calibration events with a valve position of 5, and places them in the
    Picarro Database for future analysis.

    :param N/A -- Perhaps logger once incorperated into full code
    :return: boolean, did it run?

    !! Note: To view the plots, comment out line 36 and set a breakpoint directly after plt.show() on line 131
    """

    # Import Required Functions
    from summit_core import connect_to_db
    from summit_picarro import Base, Datum, CalEvent, find_cal_indices
    from summit_picarro import filter_postcal_data
    from matplotlib.pyplot import figure

    # Connect to the database
    rundir = r'C:\Users\ARL\Desktop'  # location of DB
    engine, session = connect_to_db('sqlite:///JASHAN_summit_picarro.sqlite',
                                    rundir)  # Create eng & sess
    Base.metadata.create_all(engine)  # Create base

    # Get any data with a valve position of 5
    standard_data = {}
    MPV = 5
    mpv_data = pd.DataFrame(
        session.query(Datum.id, Datum.date)  # Gets the datum ID & Date
        .filter(Datum.mpv_position == MPV)  # Filters them for valve pos #5
        .filter(Datum.cal_id == None)  # only if not already any cal event
        .all())  # actually gathers the data

    mpv_data['date'] = pd.to_datetime(
        mpv_data['date'])  # Convert to PD datetime version

    mpv_converter = {
        5: 'ch4_GC_std'
    }  # TODO: Incorporate in larger project code
    standard_data[mpv_converter[MPV]] = mpv_data.sort_values(
        by=['date']).reset_index(drop=True)

    # Create a calc_event with the given name of this standard
    for standard, data in standard_data.items():
        indices = find_cal_indices(
            data['date'])  # Gathers the indicies of each new cal event

        cal_events = []  # preallocation of cal events
        prev_ind = 0  # prev_ind is initially the first index

        # If indicies is empty, but there is still data, create a single event
        if not len(indices) and len(data):
            event_data = (session.query(Datum).filter(Datum.id.in_(
                data['id'])).all())
            cal_events.append(CalEvent(event_data, standard))

        # Seperate cal events from gathered indicies and place in cal_events
        for num, ind in enumerate(indices):
            event_data = (
                session.query(Datum)  # Searches through all Datums
                .filter(Datum.id.in_(data['id'].iloc[prev_ind:ind])
                        )  # Data bewteen index and previous
                .all())  # actually gathers the data
            cal_events.append(CalEvent(event_data,
                                       standard))  # appends cal events list

            if num == (len(indices) - 1):  # last index, gets the rest
                event_data = (
                    session.query(Datum).filter(
                        Datum.id.in_(
                            data['id'].iloc[ind:]))  # index to end of list
                    .all())
                cal_events.append(CalEvent(
                    event_data, standard))  # appends cal events list

                # Create a plot of this cal event
                coPlot, co2Plot, ch4Plot, datePlot = [], [], [], []
                for x in event_data:
                    coPlot.append(x.co)
                    co2Plot.append(x.co2)
                    ch4Plot.append(x.ch4)
                    # Raw Numbers
                    if datePlot == []:
                        timestep = x.date.timestamp()
                        datePlot.append(timestep)
                    else:
                        timestep = x.date.timestamp() - datePlot[
                            0]  # likely a better way
                        datePlot.append(timestep)  # to do this

                ev = cal_events[len(indices)]
                for cpd in ['co', 'co2', 'ch4']:
                    time = (ev.date - ev.dates[0]).seconds
                    ev.calc_result(cpd, time)

                datePlot[0] = 0  # start it at 0

                figure(1)
                table_vals = [[list(ev.co_result.values())[0]],
                              [list(ev.co_result.values())[1]],
                              [list(ev.co_result.values())[2]]]
                the_table = plt.table(cellText=table_vals,
                                      cellColours=None,
                                      cellLoc='right',
                                      colWidths=[0.3] * 3,
                                      rowLabels=['mean', 'median', 'stdev'],
                                      rowColours=None,
                                      rowLoc='left',
                                      colLabels=['value'],
                                      colColours=None,
                                      colLoc='center',
                                      loc='lower right',
                                      bbox=None)
                plt.plot(datePlot, coPlot, label='co')
                plt.xlabel('Time since start of cal_event [seconds]')
                plt.ylabel('Compounds')
                plt.title('CO')

                figure(2)
                table_vals = [[list(ev.co2_result.values())[0]],
                              [list(ev.co2_result.values())[1]],
                              [list(ev.co2_result.values())[2]]]
                the_table = plt.table(cellText=table_vals,
                                      cellColours=None,
                                      cellLoc='right',
                                      colWidths=[0.3] * 3,
                                      rowLabels=['mean', 'median', 'stdev'],
                                      rowColours=None,
                                      rowLoc='left',
                                      colLabels=['value'],
                                      colColours=None,
                                      colLoc='center',
                                      loc='upper right',
                                      bbox=None)
                plt.plot(datePlot, co2Plot, label='co2')
                plt.xlabel('Time since start of cal_event [seconds]')
                plt.ylabel('Compounds')
                plt.title('CO2')

                figure(3)
                table_vals = [[list(ev.ch4_result.values())[0]],
                              [list(ev.ch4_result.values())[1]],
                              [list(ev.ch4_result.values())[2]]]
                the_table = plt.table(cellText=table_vals,
                                      cellColours=None,
                                      cellLoc='right',
                                      colWidths=[0.3] * 3,
                                      rowLabels=['mean', 'median', 'stdev'],
                                      rowColours=None,
                                      rowLoc='left',
                                      colLabels=['value'],
                                      colColours=None,
                                      colLoc='center',
                                      loc='upper right',
                                      bbox=None)
                plt.plot(datePlot, ch4Plot, label='ch4')
                plt.xlabel('Time since start of cal_event [seconds]')
                plt.ylabel('Compounds')
                plt.title('ch4')

                plt.show()

            prev_ind = ind  # set previous index as current

        # Calculate the CO, CO2, and Methane results with Brendan's functions
        for ev in cal_events:
            filter_postcal_data(
                ev, session)  # filter following min of ambient data

            if ev.date - ev.dates[0] < dt.timedelta(
                    seconds=90):  # events under 90 seconds are dumped
                ev.standard_used = 'dump'  # assign dump name
                session.merge(ev)  # merge results with session
            # otherwise, iterate over each compound and calculate results
            else:
                for cpd in ['co', 'co2', 'ch4']:
                    time = 21
                    ev.calc_result(
                        cpd, time)  # results are calced (time) seconds back

                session.merge(ev)  # merge results with session

        # Save to your local copy of the database & check results
        session.commit()  # commit results to session

    # Create a timeseries of the results to ascertain what portion of the data we want to keep

    # Integrate with Brendan's code once tested for errors

    return True
Пример #13
0
async def check_load_run_logs(logger):
    """
    Read new log files and create new GcRun and Sample objects if possible.

    :param logger: logger, to log events to
    :return: Boolean, True if it ran without error and created data, False if not
    """

    try:
        from summit_core import methane_logs_path
        from summit_core import methane_dir as rundir
        from summit_core import get_all_data_files, connect_to_db
        from summit_methane import Base, GcRun, Sample, read_log_file
    except ImportError as e:
        logger.error('ImportError occurred in check_load_run_logs()')
        send_processor_email(PROC, exception=e)
        return False

    try:
        engine, session = connect_to_db('sqlite:///summit_methane.sqlite',
                                        rundir)
        Base.metadata.create_all(engine)
    except Exception as e:
        logger.error(
            f'Exception {e.args} prevented connection to the database in check_load_pa_log()'
        )
        send_processor_email(PROC, exception=e)
        return False

    try:
        logger.info('Running check_load_run_logs()')
        runs_in_db = session.query(GcRun).all()
        samples = session.query(Sample)
        sample_count = samples.count()

        run_dates = [r.date for r in runs_in_db]

        files = get_all_data_files(methane_logs_path, '.txt')

        runs = []
        for file in files:
            runs.append(read_log_file(file))

        new_run_count = 0  # count runs added
        for run in runs:
            if run.date not in run_dates:
                session.add(run)
                logger.info(f'GcRun for {run.date} added.')
                new_run_count += 1

        if not new_run_count:
            logger.info('No new GcRuns added.')
        else:
            session.commit()
            new_sample_count = session.query(Sample).count() - sample_count
            logger.info(
                f'{new_run_count} GcRuns added, containing {new_sample_count} Samples.'
            )

            if new_run_count * 10 != new_sample_count:
                logger.warning(
                    'There were not ten Samples per GcRun as expected.')

        session.close()
        engine.dispose()
        return True

    except Exception as e:
        session.close()
        engine.dispose()

        logger.error(f'Exception {e.args} occurred in check_load_pa_log()')
        send_processor_email(PROC, exception=e)
        return False
Пример #14
0
async def read_excel_sheet(logger):
    logger.info('Running update_excel_sheet()')

    try:
        import pandas as pd
        from datetime import datetime

        from summit_core import methane_dir as rundir
        from summit_errors import send_processor_warning

        from summit_methane import GcRun, Base, add_formulas_and_format_sheet
        from summit_core import Config, connect_to_db, append_df_to_excel
        from summit_core import methane_dir, core_dir, data_file_paths

        methane_sheet = data_file_paths.get('methane_sheet', None)

        if not methane_sheet:
            logger.error(
                'Filepath for the methane integration sheet could not be retrieved.'
            )
            send_processor_warning(
                PROC, 'Filepath Error',
                '''The methane integration sheet filepath could not be retrieved. It should be listed
                                   as "methane_sheet" in file_locations.json in the core folder.'''
            )
            return False

    except ImportError as e:
        logger.error('ImportError occurred in update_excel_sheet()')
        send_processor_email(PROC, exception=e)
        return False

    try:
        engine, session = connect_to_db('sqlite:///summit_methane.sqlite',
                                        rundir)
        Base.metadata.create_all(engine)
    except Exception as e:
        logger.error(
            f'Exception {e.args} prevented connection to the database in update_excel_sheet()'
        )
        send_processor_email(PROC, exception=e)
        return False

    try:
        core_engine, core_session = connect_to_db(
            'sqlite:///summit_core.sqlite', core_dir)
        Config.__table__.create(core_engine, checkfirst=True)

        methane_sheet_read_config = (core_session.query(Config).filter(
            Config.processor == 'methane_sheet_read').one_or_none())

        if not methane_sheet_read_config:
            methane_sheet_read_config = Config(processor='methane_sheet_read')
            # use all default values except processor on init
            core_session.add(methane_sheet_read_config)
            core_session.commit()

    except Exception as e:
        logger.error(
            f'Error {e.args} prevented connecting to the core database in update_excel_sheet()'
        )
        send_processor_email(PROC, exception=e)
        return False

    try:

        core_session.merge(methane_sheet_read_config)
        core_session.commit()

        session.close()
        engine.dispose()
        core_session.close()
        core_engine.dispose()
        return True

    except Exception as e:
        session.close()
        engine.dispose()
        core_session.close()
        core_engine.dispose()
        logger.error(f'Exception {e.args} occurred in update_excel_sheet()')
        send_processor_email(PROC, exception=e)
        return False
Пример #15
0
statements for the changing parameter bounds based on the state of the H20 Trap and the Absorbent Trap. However,
the code runs slower, and currently every single log in the database has the default state for the traps,
thus I've decided to keep this one as an alternate and let you choose if theres a better way to implement this
feature without losing performence.
"""

import datetime as dt
from pathlib import Path
from datetime import datetime
import summit_core
from summit_core import connect_to_db, TempDir, Config
from summit_core import voc_dir, core_dir
from summit_voc import LogFile
import pandas as pd

engine, session = connect_to_db('sqlite:///Jsummit_voc.sqlite', voc_dir)

core_engine, core_session = connect_to_db('sqlite:///Jsummit_core.sqlite',
                                          core_dir)
Config.__table__.create(core_engine, checkfirst=True)

logcheck_config = core_session.query(Config).filter(
    Config.processor == 'Log Checking').one_or_none()

if not logcheck_config:
    logcheck_config = Config(
        processor='Log Checking',
        days_to_plot=21)  # use all default values except processor on init
    core_session.add(logcheck_config)
    core_session.commit()
Пример #16
0
def retrieve_new_files(logger):
    from summit_core import connect_to_db, list_files_recur, split_into_sets_of_n

    logger.info('Running retrieve_new_files()')

    con = connect_to_sftp()
    engine, session = connect_to_db('sqlite:///zugspitze.sqlite', CORE_DIR)

    for path in ['folder1', 'folder2', 'folder3']:
        logger.info(f'Processing {path} files.')

        local_path = CORE_DIR / path
        remote_path = REMOTE_BASE_PATH + f'/{path}'

        all_remote_files = list_remote_files_recur(
            con, remote_path)  # get a list of all SFTPAttributes + paths

        all_local_files = [str(p) for p in list_files_recur(local_path)
                           ]  # get all local file paths

        new_remote_files = []
        for remote_file in all_remote_files:
            new_remote_files.append(
                RemoteFile(remote_file.st_mtime, remote_file.path))
        # create DB objects for all remote paths

        new_local_files = []
        for remote_file in all_local_files:
            new_local_files.append(
                LocalFile(os.stat(remote_file).st_mtime, remote_file))
        # create DB objects for all local paths

        remote_sets = split_into_sets_of_n(
            [r.path for r in new_remote_files],
            750)  # don't exceed 1K sqlite var limit
        local_sets = split_into_sets_of_n([l.path for l in new_local_files],
                                          750)

        # loop through remote, then local filesets to check against DB and commit any new ones
        for Filetype, filesets, new_files in zip(
            [RemoteFile, LocalFile], [remote_sets, local_sets],
            [new_remote_files, new_local_files]):
            paths_in_db = []
            for set_ in filesets:
                in_db = session.query(Filetype.path).filter(
                    Filetype.path.in_(set_)).all()
                if in_db:
                    paths_in_db.extend(in_db)

            for file in new_files:
                if file.path in paths_in_db:
                    file_in_db = session.query(Filetype).filter(
                        Filetype.path == file.path).one_or_none()
                    if file.st_mtime > file_in_db.st_mtime:
                        file_in_db.st_mtime = file.st_mtime
                        session.merge(file_in_db)
                else:
                    session.add(file)
            session.commit()  # commit at the end of each filetype

        # local and remote files are now completely up-to-date in the database
        files_to_retrieve = []
        remote_files = session.query(RemoteFile).order_by(
            RemoteFile.relpath).all()
        local_files = session.query(LocalFile).order_by(
            LocalFile.relpath).all()

        for remote_file in remote_files:
            if remote_file.local is None:
                local_match = search_for_attr_value(local_files, 'relpath',
                                                    remote_file.relpath)
                if local_match:
                    remote_file.local = local_match
                    if remote_file.st_mtime > local_match.st_mtime:
                        files_to_retrieve.append(
                            remote_file
                        )  # add the remote file to download if st_mtime is greater
                else:
                    files_to_retrieve.append(
                        remote_file
                    )  # add the remote file if there's no local copy (create later)
            else:
                if remote_file.st_mtime > remote_file.local.st_mtime:
                    files_to_retrieve.append(remote_file)

        logger.info(f'Remote files: {len(remote_files)}')
        logger.info(f'Local files: {len(local_files)}')
        logger.info(
            f'{len(files_to_retrieve)} file need updating or retrieval.')

        ct = 0
        for remote_file in files_to_retrieve:
            if remote_file.local is not None:
                con.get(remote_file.path, remote_file.local.path
                        )  # get remote file and put in the local's path

                remote_file.local.st_mtime = remote_file.st_mtime  # update, then merge
                session.merge(remote_file)

                logger.info(f'Remote file {remote_file.relpath} was updated.')
                ct += 1
            else:
                new_local_path = CORE_DIR / remote_file.relpath.lstrip('/')

                scan_and_create_dir_tree(
                    new_local_path
                )  # scan the path and create any needed folders

                new_local_path = str(new_local_path)  # revert to string
                con.get(
                    remote_file.path,
                    new_local_path)  # get file and put in it's relative place

                new_local = LocalFile(remote_file.st_mtime, new_local_path)
                new_local.remote = remote_file

                session.add(
                    new_local
                )  # create, relate, and add the local file that was transferred
                session.merge(remote_file)

                logger.info(
                    f'Remote file {remote_file.relpath} was retrieved and added to local database.'
                )
                ct += 1

            if ct % 100 == 0:
                session.commit()  # routinely commit files in batches of 100

        session.commit()

        session.close()
        engine.dispose()
Пример #17
0
async def load_excel_corrections(logger):

    try:
        import pandas as pd
        from pathlib import Path
        from summit_voc import Peak, LogFile, NmhcLine, NmhcCorrection, GcRun
        from summit_core import connect_to_db, search_for_attr_value
        from summit_core import voc_dir as rundir
    except ImportError as e:
        logger.error('ImportError occurred in load_excel_corrections()')
        return False

    data = pd.read_excel('Ambient_2019.xlsx', header=None,
                         usecols=check_cols).dropna(axis=1, how='all')

    data = data.set_index([0])  # set first row of df to the index
    data.index = data.index.str.lower()
    data = data[
        data.
        columns[:
                -1]]  # drop last row (column?) of DF (the one with 'END' in it)

    engine, session = connect_to_db('sqlite:///summit_voc.sqlite', rundir)

    logfiles = session.query(LogFile).order_by(LogFile.samplecode)
    nmhc_lines = session.query(NmhcLine).filter(
        NmhcLine.correction_id == None).order_by(NmhcLine.id)
    gc_runs = session.query(GcRun).order_by(GcRun.id)

    nmhc_corrections = []

    corrections_in_db = session.query(NmhcCorrection).all()
    correction_dates_in_db = [c.date for c in corrections_in_db]

    with session.no_autoflush:
        for col_name in data.columns.tolist():
            col = data.loc[:, col_name]
            nmhc_corrections.append(
                correction_from_df_column(col, logfiles, nmhc_lines, gc_runs,
                                          logger))

    for correction in nmhc_corrections:
        if correction:
            if correction.date not in correction_dates_in_db:
                session.add(correction)
                logger.info(f'Correction for {correction.date} added.')

    session.commit()

    nmhc_corrections = session.query(NmhcCorrection).filter(
        NmhcCorrection.status == 'unapplied').all()
    # re-get all added corrections that haven't been applied

    for correction in nmhc_corrections:
        if correction:
            line = session.query(NmhcLine).filter(
                NmhcLine.correction_id == correction.id).one_or_none()

            if not line:
                logger.info(
                    f'A matching line for NmhcCorrection {correction} was not found.'
                )
                continue
        else:
            continue

        for peak_corr in correction.peaklist:
            peak_by_name = search_for_attr_value(line.peaklist, 'name',
                                                 peak_corr.name)
            peak_by_rt = search_for_attr_value(line.peaklist, 'rt',
                                               peak_corr.rt)

            if (peak_by_name and peak_by_rt) and (
                    peak_by_name is
                    peak_by_rt):  # if they're not None, and identical
                peak = peak_by_name

            else:
                if peak_by_name and peak_by_rt:  # if both exist, but not identical, prefer the RT-found one
                    peak_by_name.name = '-'
                    peak_by_rt.name = peak_corr.name
                    peak = peak_by_rt
                    session.merge(peak)
                    session.merge(peak_by_name)

                elif peak_by_name:
                    peak = peak_by_name
                    session.merge(peak)

                elif peak_by_rt:
                    peak = peak_by_rt
                    peak.name = peak_corr.name
                    session.merge(peak)

                else:
                    logger.warning(
                        f"Peak with name {peak_corr.name} or retention time of {peak_corr.rt} from "
                        +
                        f"NmhcCorrection {correction.date} not found in NmhcLine for {line.date}"
                    )
                    continue

            if peak.pa != peak_corr.pa:
                peak.pa = peak_corr.pa
                peak.rt = peak_corr.rt
                peak.rev = peak.rev + 1  # Sqlite *does not* like using += notation

        correction.status = 'applied'

        line.nmhc_corr_con = correction
        correction.correction_id = line

        session.merge(correction)
        session.merge(line)
        logger.info(f'Successful peak corrections made to {line.date}')
        session.commit()
Пример #18
0
def brendan_test():
    from datetime import datetime
    import pandas as pd

    from summit_core import connect_to_db, merge_lists, search_for_attr_value
    from summit_core import methane_dir
    from summit_methane import SampleCorrection, Base, GcRun

    filename = r'Z:\Data\Summit_GC\Summit_GC_2019\CH4_results\Methane_Automated_2019.xlsx'
    #filename = r'/home/brendan/PycharmProjects/Summit/processors/summit_methane_processor/SUM_CH4_insitu_2019.xlsx'
    # Brendan's path

    year = filename.split('.')[-2][-4:]  # risky...

    engine, session = connect_to_db('sqlite:///summit_methane_tester.sqlite',
                                    methane_dir)
    Base.metadata.create_all(engine)

    data = pd.read_excel(filename, sheet_name='Sheet1')

    indices = data['date'].dropna(how='all').index.tolist()

    for ind in indices:
        if ind % 5 is not 0:
            date = data.loc[ind, 'date']
            filename = data.loc[ind, 'filename']
            print(
                f'File {filename} for run {date} did not have the proper number of lines to analyze.'
            )  # can't happen

    indices = [i for i in indices
               if i % 5 is 0]  # remove any failed after warning above

    gc_runs = session.query(GcRun)

    ct = 0
    for ind in indices:
        run_date = data.loc[ind, 'date'].to_pydatetime()

        matched_run = gc_runs.filter(GcRun.date == run_date).one_or_none()

        if not matched_run:
            print(f'No run matched for {run_date}.')
            continue  # for now...

        run_set = data.loc[ind:ind + 6, ['peak1', 'peak2']].dropna(axis=1,
                                                                   how='all')

        if not run_set.columns.tolist(
        ):  # if the subset of peak1 and peak2 is empty after dropping any where all = na
            # print('WARNING - LOG ME')
            continue

        peaks1 = run_set['peak1'].values.tolist(
        )  # column of peaks, ordered 1,3,5,7,9
        peaks2 = run_set['peak2'].values.tolist(
        )  # column of peaks, ordered 2,4,6,8,10

        ordered_peaks = merge_lists(
            peaks1, peaks2)  # returns peaks in order [0,1,2,3,4, ..., 9]

        corrections = []
        for num, pa in enumerate(ordered_peaks):
            """
            Finding samples in db:
            Use DOY, hour to find the run, then use run.id to get samples iteratively,
                if sample of x num does not exist, warn/ log an error (should have been created when reading log)
            """

            matched_sample = search_for_attr_value(matched_run.samples,
                                                   'sample_num', num)

            if not matched_sample:
                print(
                    f'Matched sample not found for sample number {num} in GcRun for {matched_run.date}.'
                )
                continue

            corrections.append(SampleCorrection(num, pa, matched_sample))

        # for sample in corrections:
        #     print(sample)
        #     print(sample.sample_num)
        #     print(sample.pa)

        for corr in corrections:
            # TODO: Check for already present in DB
            session.merge(corr)

        ct += 1

        if ct > 50:
            continue

    # get number of data points for each day/hour combo

    # print(counts.where(counts != 5).dropna(how='all', axis='rows'))
    # warn these exist, they can't be safely interpreted

    session.commit()
    session.close()
    engine.dispose()
Пример #19
0
async def match_peaks_to_samples(logger):
    """
    All detected peaks in a run are attached to NmhcLines, but are not linked to Samples until they've passed certain
    criteria.

    :param logger: logger, to log events to
    :return: Boolean, True if it ran without error and created data, False if not
    """

    try:
        from summit_core import methane_dir as rundir
        from summit_core import connect_to_db, split_into_sets_of_n
        from summit_methane import Peak, Sample, GcRun, Base, sample_rts
        from operator import attrgetter
        import datetime as dt
    except ImportError as e:
        logger.error('ImportError occurred in match_peaks_to_samples()')
        send_processor_email(PROC, exception=e)
        return False

    try:
        engine, session = connect_to_db('sqlite:///summit_methane.sqlite',
                                        rundir)
        Base.metadata.create_all(engine)
    except Exception as e:
        logger.error(
            f'Exception {e.args} prevented connection to the database in match_peaks_to_samples()'
        )
        send_processor_email(PROC, exception=e)
        return False

    try:
        logger.info('Running match_peaks_to_samples()')

        unmatched_samples = session.query(Sample).filter(
            Sample.peak_id == None, Sample.run_id != None).all()

        whole_set = list({s.run_id for s in unmatched_samples})
        # SQLite can't take in clauses with > 1000 variables, so chunk to sets of 500
        if len(whole_set) > 500:  # subdivide set
            sets = split_into_sets_of_n(whole_set, 500)
        else:
            sets = [
                whole_set
            ]  # TODO: Can be reduced to just splitting, this step is done automatically by split_into.

        runs_w_unmatched_samples = []
        for set in sets:
            runs_w_unmatched_samples.extend(
                (session.query(GcRun).filter(GcRun.id.in_(set)).all()
                 ))  # create set of runs that require processing

        for run in runs_w_unmatched_samples:
            # loop through runs containing samples that haven't been matched with peaks
            samples = session.query(Sample).filter(
                Sample.run_id == run.id).all()
            peaks = session.query(Peak).filter(
                Peak.pa_line_id == run.pa_line_id)

            for sample in samples:
                sn = sample.sample_num
                potential_peaks = peaks.filter(
                    Peak.rt.between(sample_rts[sn][0],
                                    sample_rts[sn][1])).all()
                # filter for peaks in this gc run between the expected retention times given in sample_rts

                if len(potential_peaks):
                    # currently, the criteria for "this is the real peak" is "this is the biggest peak"
                    peak = max(potential_peaks, key=attrgetter('pa'))
                    if peak:
                        sample.peak = peak
                        peak.name = 'CH4_' + str(sample.sample_num)
                        sample.date = run.pa_line.date + dt.timedelta(
                            minutes=peak.rt - 1)
                        session.merge(sample)

        session.commit()
        session.close()
        engine.dispose()
        return True

    except Exception as e:
        logger.error(f'Excetion {e.args} occurred in match_peaks_to_samples()')
        send_processor_email(PROC, exception=e)
        return False
Пример #20
0
async def create_mastercals(logger):
    """
    Searches all un-committed CalEvents, looking for (high, middle, low) sets that can then have a curve and
    other stats calculated. It will report them as DEBUG items in the log.

    :param logger: logging logger at module level
    :return: boolean, did it run/process new data?
    """

    logger.info('Running create_mastercals()')

    try:
        from summit_core import picarro_dir as rundir
        from summit_core import connect_to_db
        from summit_picarro import MasterCal, CalEvent, match_cals_by_min
        import matplotlib.pyplot as plt
        import seaborn as sns
        import numpy as np
    except Exception as e:
        logger.error('ImportError occured in create_mastercals()')
        send_processor_email(PROC, exception=e)
        return False

    try:
        engine, session = connect_to_db('sqlite:///summit_picarro.sqlite',
                                        rundir)
    except Exception as e:
        logger.error(
            f'Exception {e.args} prevented connection to database in create_mastercals()'
        )
        send_processor_email(PROC, exception=e)
        return False
    try:
        # Get cals by standard, but only if they're not in another MasterCal already
        lowcals = (session.query(CalEvent).filter(
            CalEvent.mastercal_id == None,
            CalEvent.standard_used == 'low_std').all())

        highcals = (session.query(CalEvent).filter(
            CalEvent.mastercal_id == None,
            CalEvent.standard_used == 'high_std').all())

        midcals = (session.query(CalEvent).filter(
            CalEvent.mastercal_id == None,
            CalEvent.standard_used == 'mid_std').all())

        mastercals = []
        for lowcal in lowcals:
            matching_high = match_cals_by_min(lowcal, highcals, minutes=5)

            if matching_high:
                matching_mid = match_cals_by_min(matching_high,
                                                 midcals,
                                                 minutes=5)

                if matching_mid:
                    mastercals.append(
                        MasterCal([lowcal, matching_high, matching_mid]))

        if mastercals:
            for mc in mastercals:
                # calculate curve from low - high point, and check middle distance
                mc.create_curve()
                session.add(mc)
                logger.info(f'MasterCal for {mc.subcals[0].date} created.')

            session.commit()
            return True

        else:
            logger.info('No MasterCals were created.')
            return False

    except Exception as e:
        logger.error(f'Exception {e.args} occured in create_mastercals()')
        send_processor_email(PROC, exception=e)
        return False
Пример #21
0
async def quantify_samples(logger):
    """
    On a per-run basis, use std1 to calc samples 1-5 (~3) and std2 to calculate samples 6-10 (~8). Output warnings
    if only one standard in a sample is valid.

    :param logger: logger, to log events to
    :return: Boolean, True if successful
    """

    try:
        from summit_core import methane_dir as rundir
        from summit_core import connect_to_db, search_for_attr_value
        from summit_methane import Standard, GcRun, Base
        from summit_methane import calc_ch4_mr, valid_sample
    except Exception as e:
        logger.error('ImportError occurred in qunatify_samples()')
        send_processor_email(PROC, exception=e)
        return False

    try:
        engine, session = connect_to_db('sqlite:///summit_methane.sqlite',
                                        rundir)
        Base.metadata.create_all(engine)
    except Exception as e:
        logger.error(
            f'Exception {e.args} prevented connection to the database in check_load_pa_log()'
        )
        send_processor_email(PROC, exception=e)
        return False

    try:
        logger.info('Running quantify_samples()')

        unquantified_runs = session.query(GcRun).filter(
            GcRun.median == None).all()

        ct = 0
        for run in unquantified_runs:

            # TODO: Move the majority of this to class methods for GcRuns; will make editing integrations WAY easier
            samples = run.samples

            standard = (
                session.query(Standard).filter(
                    run.date >= Standard.date_st,
                    run.date < Standard.date_en).first()
            )  # TODO; Set unique constraints on standards, revert to one_or_none()

            if standard is not None:
                ambients = [
                    sample for sample in samples
                    if (sample.sample_type == 3 and valid_sample(sample))
                ]
                standard1 = search_for_attr_value(samples, 'sample_num', 2)
                standard2 = search_for_attr_value(samples, 'sample_num', 7)

                if not ambients:
                    logger.warning(
                        f'No ambient samples were quantifiable in GcRun for {run.date}'
                    )
                    continue

                if (not valid_sample(standard1)) and (
                        not valid_sample(standard2)):
                    logger.warning(
                        f'No valid standard samples found in GcRun for {run.date}.'
                    )
                    continue

                elif not valid_sample(standard1):
                    # use std2 for all ambient quantifications
                    logger.info(
                        f'Only one standard used for samples in GcRun for {run.date}'
                    )
                    for amb in ambients:
                        amb = calc_ch4_mr(amb, standard2, standard)

                elif not valid_sample(standard2):
                    # use std1 for all ambient quantifications
                    logger.info(
                        f'Only one standard used for samples in GcRun for {run.date}'
                    )
                    for amb in ambients:
                        amb = calc_ch4_mr(amb, standard1, standard)

                else:
                    # use std1 for ambients 0-4 and std2 for ambients 5-9
                    for amb in ambients:
                        if amb.sample_num < 5:
                            amb = calc_ch4_mr(amb, standard1, standard)
                        else:
                            amb = calc_ch4_mr(amb, standard2, standard)

                    run.standard_rsd = (
                        s.stdev([standard1.peak.pa, standard2.peak.pa]) /
                        s.median([standard1.peak.pa, standard2.peak.pa]))

                from summit_methane import plottable_sample

                all_run_mrs = [
                    amb.peak.mr for amb in ambients if plottable_sample(amb)
                ]
                # do basic filtering for calculating run medians
                if all_run_mrs:
                    run.median = s.median(all_run_mrs)
                    if len(all_run_mrs) > 1:
                        run.rsd = s.stdev(all_run_mrs) / run.median

                session.merge(run)
                # merge only the run, it contains and cascades samples, palines and peaks that were changed
                ct += 1

            else:
                logger.warning(
                    f'No standard value found for GcRun at {run.date}.')

        session.commit()

        if ct:
            logger.info(f'{ct} GcRuns were successfully quantified.')
            session.close()
            engine.dispose()
            return True
        else:
            logger.info('No GcRuns quantified.')
            session.close()
            engine.dispose()
            return False

    except Exception as e:
        logger.error(f'Exception {e.args} occurred in quantify_samples()')
        send_processor_email(PROC, exception=e)
        return False
Пример #22
0
async def plot_new_data(logger):
    """
    Checks data against the last plotting time, and creates new plots for CO, CO2, and CH4 if new data exists.

    :param logger: logging logger at module level
    :return: boolean, did it run/process new data?
    """

    logger.info('Running plot_new_data()')

    try:
        from pathlib import Path
        from summit_core import picarro_dir as rundir
        from summit_core import create_daily_ticks, connect_to_db, TempDir, Plot, core_dir, Config, add_or_ignore_plot
        from summit_picarro import Base, Datum, summit_picarro_plot

        plotdir = rundir / 'plots'
        remotedir = r'/data/web/htdocs/instaar/groups/arl/summit/plots'

    except Exception as e:
        logger.error('ImportError occurred in plot_new_data()')
        send_processor_email(PROC, exception=e)
        return False

    try:
        engine, session = connect_to_db('sqlite:///summit_picarro.sqlite',
                                        rundir)
        Base.metadata.create_all(engine)
    except Exception as e:
        logger.error(f'Exception {e.args} occurred in plot_new_data()')
        send_processor_email(PROC, exception=e)
        return False

    try:
        core_engine, core_session = connect_to_db(
            'sqlite:///summit_core.sqlite', core_dir)
        Plot.__table__.create(core_engine, checkfirst=True)
        Config.__table__.create(core_engine, checkfirst=True)

        picarro_config = core_session.query(Config).filter(
            Config.processor == PROC).one_or_none()

        if not picarro_config:
            picarro_config = Config(
                processor=PROC
            )  # use all default values except processor on init
            core_session.add(picarro_config)
            core_session.commit()

    except Exception as e:
        logger.error(
            f'Error {e.args} prevented connecting to the core database in plot_new_data()'
        )
        send_processor_email(PROC, exception=e)
        return False

    try:
        newest_data_point = (session.query(Datum.date).filter(
            Datum.mpv_position == 1).order_by(Datum.date.desc()).first()[0])

        if newest_data_point <= picarro_config.last_data_date:
            logger.info('No new data was found to plot.')
            core_session.close()
            core_engine.dispose()
            session.close()
            engine.dispose()
            return False

        picarro_config.last_data_date = newest_data_point
        core_session.add(picarro_config)

        date_limits, major_ticks, minor_ticks = create_daily_ticks(
            picarro_config.days_to_plot)

        all_data = (
            session.query(Datum.date, Datum.co, Datum.co2, Datum.ch4).filter((
                Datum.mpv_position == 0) | (Datum.mpv_position == 1)).filter(
                    (Datum.instrument_status == 963),
                    (Datum.alarm_status == 0)).filter(
                        Datum.date >= date_limits['left']
                    )  # grab only data that falls in plotting period
            .all())

        if not all_data:
            logger.info('No new data was found to plot.')
            core_session.close()
            core_engine.dispose()
            session.close()
            engine.dispose()
            return False

        # get only ambient data
        dates = []
        co = []
        co2 = []
        ch4 = []
        for result in all_data:
            dates.append(result.date)
            co.append(result.co)
            co2.append(result.co2)
            ch4.append(result.ch4)

        with TempDir(plotdir):

            from summit_core import five_minute_medians
            dates_co, co = five_minute_medians(dates, co)

            name = summit_picarro_plot(None, ({
                'Summit CO': [dates_co, co]
            }),
                                       limits={
                                           'right':
                                           date_limits.get('right', None),
                                           'left':
                                           date_limits.get('left', None),
                                           'bottom': 60,
                                           'top': 180
                                       },
                                       major_ticks=major_ticks,
                                       minor_ticks=minor_ticks)

            co_plot = Plot(plotdir / name, remotedir,
                           True)  # stage plots to be uploaded
            add_or_ignore_plot(co_plot, core_session)

            name = summit_picarro_plot(None, ({
                'Summit CO2': [dates, co2]
            }),
                                       limits={
                                           'right':
                                           date_limits.get('right', None),
                                           'left':
                                           date_limits.get('left', None),
                                           'bottom': 400,
                                           'top': 420
                                       },
                                       major_ticks=major_ticks,
                                       minor_ticks=minor_ticks,
                                       unit_string='ppmv')

            co2_plot = Plot(plotdir / name, remotedir,
                            True)  # stage plots to be uploaded
            add_or_ignore_plot(co2_plot, core_session)

            name = summit_picarro_plot(None, ({
                'Summit Methane [Picarro]': [dates, ch4]
            }),
                                       limits={
                                           'right':
                                           date_limits.get('right', None),
                                           'left':
                                           date_limits.get('left', None),
                                           'bottom': 1850,
                                           'top': 2050
                                       },
                                       major_ticks=major_ticks,
                                       minor_ticks=minor_ticks)

            ch4_plot = Plot(plotdir / name, remotedir,
                            True)  # stage plots to be uploaded
            add_or_ignore_plot(ch4_plot, core_session)

        logger.info('New data plots were created.')

        session.close()
        engine.dispose()

        core_session.commit()
        core_session.close()
        core_engine.dispose()
        return True
    except Exception as e:
        logger.error(f'Exception {e.args} occurred in plot_new_data()')
        send_processor_email(PROC, exception=e)

        session.close()
        engine.dispose()

        core_session.close()
        core_engine.dispose()
        return False
Пример #23
0
async def plot_new_data(logger):
    """
    If newer data exists, plot it going back one week from the day of the plotting.

    :param logger: logger, to log events to
    :return: Boolean, True if it ran without error and created data, False if not
    """

    try:
        from pathlib import Path
        from summit_core import core_dir, Config
        from summit_core import methane_dir as rundir
        from summit_core import connect_to_db, create_daily_ticks, TempDir, Plot, add_or_ignore_plot
        from summit_methane import Sample, GcRun, Base, plottable_sample, summit_methane_plot

        remotedir = r'/data/web/htdocs/instaar/groups/arl/summit/plots'

    except ImportError as e:
        logger.error('ImportError occurred in plot_new_data()')
        send_processor_email(PROC, exception=e)
        return False

    try:
        engine, session = connect_to_db('sqlite:///summit_methane.sqlite',
                                        rundir)
        Base.metadata.create_all(engine)
    except Exception as e:
        logger.error(
            f'Exception {e.args} prevented connection to the database in plot_new_data()'
        )
        send_processor_email(PROC, exception=e)
        return False

    try:
        core_engine, core_session = connect_to_db(
            'sqlite:///summit_core.sqlite', core_dir)
        Plot.__table__.create(core_engine, checkfirst=True)
        Config.__table__.create(core_engine, checkfirst=True)

        ch4_config = core_session.query(Config).filter(
            Config.processor == PROC).one_or_none()

        if not ch4_config:
            ch4_config = Config(
                processor=PROC
            )  # use all default values except processor on init
            core_session.add(ch4_config)
            core_session.commit()

    except Exception as e:
        logger.error(
            f'Error {e.args} prevented connecting to the core database in plot_new_data()'
        )
        send_processor_email(PROC, exception=e)
        return False

    try:
        logger.info('Running plot_new_data()')

        engine, session = connect_to_db('sqlite:///summit_methane.sqlite',
                                        rundir)

        runs_with_medians = (session.query(GcRun).filter(
            GcRun.median != None).filter(GcRun.standard_rsd < .02).filter(
                GcRun.rsd < .02).order_by(GcRun.date).all())

        last_ambient_date = runs_with_medians[-1].date
        # get date after filtering, ie don't plot if there's no new data getting plotted

        date_limits, major_ticks, minor_ticks = create_daily_ticks(
            ch4_config.days_to_plot)

        if last_ambient_date > ch4_config.last_data_date:

            ambient_dates = [run.date for run in runs_with_medians]
            ambient_mrs = [run.median for run in runs_with_medians]

            with TempDir(rundir / 'plots'):
                name = summit_methane_plot(
                    None,
                    {'Summit Methane [GC]': [ambient_dates, ambient_mrs]},
                    limits={
                        'bottom': 1850,
                        'top': 2050,
                        'right': date_limits.get('right', None),
                        'left': date_limits.get('left', None)
                    },
                    major_ticks=major_ticks,
                    minor_ticks=minor_ticks)

                methane_plot = Plot(rundir / 'plots' / name, remotedir,
                                    True)  # stage plots to be uploaded
                add_or_ignore_plot(methane_plot, core_session)

                ch4_config.last_data_date = last_ambient_date
                core_session.merge(ch4_config)

            logger.info('New data plots created.')
        else:
            logger.info('No new data found to be plotted.')

        session.close()
        engine.dispose()

        core_session.commit()
        core_session.close()
        core_engine.dispose()
        return True

    except Exception as e:
        logger.error(f'Exception {e.args} occurred in plot_new_data()')
        send_processor_email(PROC, exception=e)
        core_session.close()
        core_engine.dispose()
        session.close()
        engine.dispose()
        return False
Пример #24
0
async def plot_dailies(logger):
    """
    Loads dailies for the last 3 weeks and plots with ticks for every three days and minor ticks for every day.
    Plots are registered with the core database so they're uploaded to the Taylor drive.

    :param logger: logger, to log events to
    :return: Boolean, True if it ran without error and created data, False if not
    """

    try:
        from pathlib import Path
        import datetime as dt
        from summit_core import connect_to_db, core_dir, TempDir, Config, Plot, add_or_ignore_plot, create_daily_ticks
        plotdir = core_dir / 'plots/daily'
        remotedir = r'/data/web/htdocs/instaar/groups/arl/summit/protected/plots'

        try:
            os.chdir(plotdir)
        except FileNotFoundError:
            os.mkdir(plotdir)

    except ImportError as e:
        logger.error(f'ImportError occurred in plot_dailies()')
        send_processor_email(PROC, exception=e)
        return False

    try:
        engine, session = connect_to_db('sqlite:///summit_daily.sqlite',
                                        core_dir)
        Base.metadata.create_all(engine)
    except Exception as e:
        logger.error(
            f'Error {e.args} prevented connecting to the database in plot_dailies()'
        )
        send_processor_email(PROC, exception=e)
        return False

    try:
        core_engine, core_session = connect_to_db(
            'sqlite:///summit_core.sqlite', core_dir)
        Plot.__table__.create(core_engine, checkfirst=True)
        Config.__table__.create(core_engine, checkfirst=True)

        daily_config = core_session.query(Config).filter(
            Config.processor == PROC).one_or_none()

        if not daily_config:
            daily_config = Config(
                processor=PROC, days_to_plot=21
            )  # use all default values except processor on init
            core_session.add(daily_config)
            core_session.commit()

    except Exception as e:
        logger.error(
            f'Error {e.args} prevented connecting to the core database in plot_new_data()'
        )
        send_processor_email(PROC, exception=e)
        return False

    try:
        logger.info('Running plot_dailies()')

        date_ago = datetime.now() - dt.timedelta(
            days=daily_config.days_to_plot +
            1)  # set a static for retrieving data at beginning of plot cycle

        date_limits, major_ticks, minor_ticks = create_daily_ticks(
            daily_config.days_to_plot, minors_per_day=1)

        major_ticks = [t for ind, t in enumerate(major_ticks)
                       if ind % 3 == 0]  # use every third daily tick

        dailies = session.query(Daily).filter(Daily.date >= date_ago).order_by(
            Daily.date).all()

        dailydict = {}
        for param in daily_parameters:
            dailydict[param] = [getattr(d, param) for d in dailies]

        with TempDir(plotdir):  ## PLOT i-butane, n-butane, acetylene

            name = summit_daily_plot(dailydict.get('date'), ({
                'Ads Xfer A': [None, dailydict.get('ads_xfer_a')],
                'Ads Xfer B': [None, dailydict.get('ads_xfer_b')],
                'Valves Temp': [None, dailydict.get('valves_temp')],
                'GC Xfer Temp': [None, dailydict.get('gc_xfer_temp')],
                'Catalyst': [None, dailydict.get('catalyst')]
            }),
                                     limits={
                                         'right':
                                         date_limits.get('right', None),
                                         'left': date_limits.get('left', None),
                                         'bottom': 0,
                                         'top': 475
                                     },
                                     major_ticks=major_ticks,
                                     minor_ticks=minor_ticks)

            hot_plot = Plot(plotdir / name, remotedir, True)
            add_or_ignore_plot(hot_plot, core_session)

            name = summit_daily_plot(dailydict.get('date'), ({
                'CJ1 Temp': [None, dailydict.get('cj1')],
                'CJ2 Temp': [None, dailydict.get('cj2')],
                'Standard Temp': [None, dailydict.get('std_temp')]
            }),
                                     limits={
                                         'right':
                                         date_limits.get('right', None),
                                         'left': date_limits.get('left', None),
                                         'bottom': 10,
                                         'top': 50
                                     },
                                     major_ticks=major_ticks,
                                     minor_ticks=minor_ticks)

            room_plot = Plot(plotdir / name, remotedir, True)
            add_or_ignore_plot(room_plot, core_session)

            name = summit_daily_plot(dailydict.get('date'), ({
                'H2 Gen Pressure': [None, dailydict.get('h2_gen_p')],
                'Line Pressure': [None, dailydict.get('line_p')],
                'Zero Pressure': [None, dailydict.get('zero_p')],
                'FID Pressure': [None, dailydict.get('fid_p')]
            }),
                                     limits={
                                         'right':
                                         date_limits.get('right', None),
                                         'left': date_limits.get('left', None),
                                         'bottom': 0,
                                         'top': 75
                                     },
                                     y_label_str='Pressure (PSI)',
                                     major_ticks=major_ticks,
                                     minor_ticks=minor_ticks)

            pressure_plot = Plot(plotdir / name, remotedir, True)
            add_or_ignore_plot(pressure_plot, core_session)

            name = summit_daily_plot(dailydict.get('date'), ({
                'Inlet Short Temp': [None, dailydict.get('inlet_short')]
            }),
                                     limits={
                                         'right':
                                         date_limits.get('right', None),
                                         'left': date_limits.get('left', None),
                                         'bottom': 0,
                                         'top': 60
                                     },
                                     major_ticks=major_ticks,
                                     minor_ticks=minor_ticks)

            inlet_plot = Plot(plotdir / name, remotedir, True)
            add_or_ignore_plot(inlet_plot, core_session)

            name = summit_daily_plot(dailydict.get('date'), ({
                'Battery V': [None, dailydict.get('battv')],
                '12Va': [None, dailydict.get('v12a')],
                '15Va': [None, dailydict.get('v15a')],
                '15Vb': [None, dailydict.get('v15b')],
                '24V': [None, dailydict.get('v24')],
                '5Va': [None, dailydict.get('v5a')]
            }),
                                     limits={
                                         'right':
                                         date_limits.get('right', None),
                                         'left': date_limits.get('left', None),
                                         'bottom': 0,
                                         'top': 30
                                     },
                                     y_label_str='Voltage (v)',
                                     major_ticks=major_ticks,
                                     minor_ticks=minor_ticks)

            voltage_plot = Plot(plotdir / name, remotedir, True)
            add_or_ignore_plot(voltage_plot, core_session)

            name = summit_daily_plot(dailydict.get('date'), ({
                'MFC1': [None, dailydict.get('mfc1')],
                'MFC2': [None, dailydict.get('mfc2')],
                'MFC3a': [None, dailydict.get('mfc3a')],
                'MFC3b': [None, dailydict.get('mfc3b')],
                'MFC4': [None, dailydict.get('mfc4')],
                'MFC5': [None, dailydict.get('mfc5')]
            }),
                                     limits={
                                         'right':
                                         date_limits.get('right', None),
                                         'left': date_limits.get('left', None),
                                         'bottom': -1,
                                         'top': 3.5
                                     },
                                     y_label_str='Flow (Ml/min)',
                                     major_ticks=major_ticks,
                                     minor_ticks=minor_ticks)

            flow_plot = Plot(plotdir / name, remotedir, True)
            add_or_ignore_plot(flow_plot, core_session)

        core_session.commit()
        core_session.close()
        core_engine.dispose()

        session.close()
        engine.dispose()
        return True

    except Exception as e:
        logger.error(f'Exception {e.args} occurred in plot_dailies()')
        send_processor_email(PROC, exception=e)
        session.close()
        engine.dispose()
        return False
Пример #25
0
from datetime import datetime
from summit_core import connect_to_db, voc_dir
from summit_voc import LogFile

engine, session = connect_to_db('sqlite:///summit_voc.sqlite', voc_dir)

logfiles = session.query(LogFile).filter(
    LogFile.date > datetime(2019, 3, 8)).all()

paramBounds = ({
    'samplepressure1': (1.5, 2.65),
    'samplepressure2': (6.5, 10),
    'GCHeadP': (5, 7.75),
    'GCHeadP1': (9, 13),
    'chamber_temp_start': (18, 30),
    'WT_primary_temp_start': (-35, -24),
    'WT_secondary_temp_start': (18, 35),
    'ads_secondary_temp_start': (18, 35),
    'ads_primary_temp_start': (-35, -24),
    'chamber_temp_end': (18, 30),
    'WT_primary_temp_end': (-35, -24),
    'WT_secondary_temp_end': (15, 35),
    'ads_secondary_temp_end': (15, 35),
    'ads_primary_temp_end': (-35, -24),
    'traptempFH': (-35, 0),
    'GCstarttemp': (35, 45),
    'traptempinject_end': (285, 310),
    'traptempbakeout_end': (310, 335),
    'WT_primary_hottemp': (75, 85),
    'WT_secondary_hottemp': (20, 35),
    'GCoventemp': (190, 210)