def plot_new_data(logger):
    """
    Plots mixing ratio data, creating plot files and queueing the files for upload.

    This will plot data, regardless of if there's any new data since it's not run continously.

    :param logger: logging logger to record to
    :return: bool, True if ran corrected, False if exit on error
    """
    logger.info('Running plot_new_data()')
    try:
        engine, session = connect_to_db(DB_NAME, CORE_DIR)
    except Exception as e:
        logger.error(
            f'Error {e.args} prevented connecting to the database in plot_new_data()'
        )
        return False

    remotedir = BOULDAIR_BASE_PATH + '/MR_plots'

    compounds_to_plot = (session.query(Quantification.name).join(
        Standard, Quantification.standard_id == Standard.id).filter(
            Standard.name == 'quantlist').all())
    compounds_to_plot[:] = [q.name for q in compounds_to_plot]

    date_limits, major_ticks, minor_ticks = create_monthly_ticks(
        6, days_per_minor=7)

    with open(JSON_PUBLIC_DIR / 'zug_plot_info.json', 'r') as file:
        compound_limits = json.loads(file.read())

    for name in compounds_to_plot:
        params = (GcRun.date, Compound.mr)
        filters = (Compound.name == name, GcRun.date >= date_limits['left'],
                   *ambient_filters)

        results = abstract_query(params, filters, GcRun.date)

        dates = [r.date for r in results]
        mrs = [r.mr for r in results]

        p = MixingRatioPlot({name: (dates, mrs)},
                            limits={
                                **date_limits,
                                **compound_limits[name]
                            },
                            major_ticks=major_ticks,
                            minor_ticks=minor_ticks,
                            filepath=MR_PLOT_DIR / f'{name}_plot.png')

        p.plot()

        file_to_upload = FileToUpload(p.filepath, remotedir, staged=True)
        add_or_ignore_plot(file_to_upload, session)

    session.commit()
    session.close()
    engine.dispose()

    return True
Exemplo n.º 2
0
def load_all_logs(logger):
    """
    Process all logfiles in the log directory.

    Create LogFiles from all files in directory and check against database. Any new ones are processed in and committed.

    :param logger: Active logger that function should log to
    :return bool: True if it exits without issue/concern
    """
    logger.info('Running load_all_logs()')

    try:
        engine, session = connect_to_db(DB_NAME, CORE_DIR)
        Base.metadata.create_all(engine)
    except Exception as e:
        print(f'Connecting to DB failed for reason {e.args}.')
        print(f'The full traceback is {traceback.format_exc()}')
        return

    logfiles = sorted([Path(file) for file in os.scandir(LOG_DIR) if 'l.txt' in file.name])

    logs = []
    for file in logfiles:
        logs.append(LogFile(**read_log_file(file)))

    new_logfiles = filter_for_new_entities(logs, LogFile, 'date', session)

    for log in new_logfiles:
        session.add(log)
        logger.info(f'Log for {log.date} added.')

    session.commit()
    return True
Exemplo n.º 3
0
def load_all_integrations(logger):
    """
    Process all integration_results.txt files in GCMS directory.

    Create Integrations from all files in directory and check against database. Any new ones are processed in and
    committed.

    :param logger: Active logger that function should log to
    :return bool: True if it exits without issue/concern
    """
    logger.info('Running load_all_integrations()')

    try:
        engine, session = connect_to_db(DB_NAME, CORE_DIR)
        Base.metadata.create_all(engine)
    except Exception as e:
        print(f'Connecting to DB failed for reason {e.args}.')
        print(f'The full traceback is {traceback.format_exc()}')
        return

    all_results = sorted(get_all_data_files(GCMS_DIR, 'integration_results.txt'))

    integrations = []
    for file in all_results:
        integrations.append(Integration(**read_gcms_file(file)))

    new_integrations = filter_for_new_entities(integrations, Integration, 'date', session)

    for integration in new_integrations:
        session.add(integration)
        logger.info(f'Integration for {integration.date} added.')

    session.commit()
    return True
def run_JFJ_corrections(logger):
    """
    Runs once and not again unless the database is re-made. For data from 3/1/2018 to 12/20/2019, it will correct
    CFC-11, CFC-12, and CFC-113 by a constant factor for each compound to reflect sample differences discovered after we
    started taking two samples per day.
    :param logger:
    :return:
    """
    logger.info('Running load_historic_data()')
    try:
        engine, session = connect_to_db(DB_NAME, CORE_DIR)
    except Exception as e:
        logger.error(f'Error {e.args} prevented connecting to the database in run_JFJ_corrections()')
        return False

    config = session.query(Config).filter(Config.processor == 'JFJCorrection').one_or_none()
    if not config:
        config = Config(processor='JFJCorrection')
        config = session.merge(config)

    if config.last_data_date == datetime(1900, 1, 1):  # it's never been run before
        """
        Correct for CFC-11, CFC-12 and CFC-113
        """
        pass

        config.last_data_date = datetime.now()
        session.merge(config)

    session.commit()
    session.close()
    engine.dispose()
    return True
Exemplo n.º 5
0
def load_standards(logger):
    """
    Read standards.json and parse for any new standards.

    Reads standards.json and parses into Standard objects that are then committed. New ones are added, but updated ones
    will need to be removed and then it will add it as new. Standards that were once used as working standards will have
    a start/end date attached to them while others will not. Some, like quantlist are used just to track all compounds
    that are quantified or vocs that tracks what compounds are considered vocs.

    :param logger: Active logger that function should log to
    :return bool: True if it exits without issue/concern
    """
    standards_filepath = JSON_PRIVATE_DIR / 'standards.json'

    try:
        engine, session = connect_to_db(DB_NAME, CORE_DIR)
        Base.metadata.create_all(engine)
    except Exception as e:
        print(f'Connecting to DB failed for reason {e.args}.')
        print(f'The full traceback is {traceback.format_exc()}')
        return

    logger.info('Running load_standards()')

    standards_in_db = session.query(Standard.name).all()
    standards_in_db = {s.name for s in standards_in_db}

    standards = json.loads(standards_filepath.read_text())

    for name, vals in standards.items():
        start_date = vals.get('start_date')
        end_date = vals.get('end_date')

        if start_date:
            start_date = datetime.strptime(start_date, '%Y-%m-%d %H:%M:%S')
        if end_date:
            end_date = datetime.strptime(end_date, '%Y-%m-%d %H:%M:%S')

        standard = Standard(name, start_date, end_date)
        quantifications = []
        for compound, cert_value in vals.items():
            if compound not in {'start_date', 'end_date'}:
                quantifications.append(Quantification(compound, cert_value, standard))

        if standard.name not in standards_in_db:
            session.merge(standard)  # cascades with merging all quantifications
            logger.info(f'Standard {standard.name} added.')

    session.commit()

    session.close()
    engine.dispose()

    return
Exemplo n.º 6
0
def load_all_dailies(logger):
    """
    Process all daily files in daily folder.

    Create DailyFile objects and compare to those in the database. Any new ones are processed and new Daily instances
    are committed.

    :param logger: Active logger that function should log to
    :return bool: True if it exits without issue/concern
    """
    logger.info('Running load_all_dailies()')

    try:
        engine, session = connect_to_db(DB_NAME, CORE_DIR)
        Base.metadata.create_all(engine)
    except Exception as e:
        print(f'Connecting to DB failed for reason {e.args}.')
        print(f'The full traceback is {traceback.format_exc()}')
        return

    daily_files_in_db = session.query(DailyFile).all()

    daily_files = [DailyFile(path) for path in sorted(get_all_data_files(DAILY_DIR, '.txt'))]

    # can't use filter_for_new_entities here because it requires addtional checking of the size for updating
    new_files = []
    for file in daily_files:
        file_in_db = search_for_attr_value(daily_files_in_db, 'path', file.path)

        if not file_in_db:
            new_files.append(file)
            logger.info(f'File {file.name} added for processing.')
        else:
            if file.size > file_in_db.size:
                logger.info(f'File {file_in_db.name} added to process additional data.')
                new_files.append(file_in_db)

    if new_files:
        for file in new_files:
            dailies = read_daily_file(file.path)
            dailies = filter_for_new_entities(dailies, Daily, 'date', session)
            file_daily_dates = [d.date for d in file.entries]
            file.entries.extend([d for d in dailies if d.date not in file_daily_dates])
            file.size = file.path.stat().st_size
            session.merge(file)
            logger.info(f'File {file.name} processed for daily data.')

        session.commit()

    session.close()
    engine.dispose()
    return True
def check_send_files(logger):
    """
    Sends all queued files to the Bouldair server for the website.

    :param logger: logging logger to record to
    :return: bool, True if ran correctly, False if exit on error
    """
    logger.info('Running check_send_files()')

    try:
        engine, session = connect_to_db(DB_NAME, CORE_DIR)
        con = connect_to_bouldair()
    except Exception as e:
        logger.error(
            f'Exception {e.args} prevented connection to the database in check_send_files()'
        )
        return False

    files_to_upload = session.query(FileToUpload).filter(
        FileToUpload.staged == True)

    remote_dirs = set([f.remote_path for f in files_to_upload.all()])

    for remote_dir in remote_dirs:
        file_set = files_to_upload.filter(
            FileToUpload.remote_path == remote_dir).all()

        if file_set:
            paths_to_upload = [p.path for p in file_set]
            successes = send_files_sftp(con, paths_to_upload, remote_dir)

            for file, success in zip(file_set, successes):
                if success:
                    logger.info(f'File {file.name} uploaded to website.')
                    session.delete(file)
                else:
                    logger.warning(f'File {file.name} failed to upload.')

    session.commit()

    session.close()
    engine.dispose()

    con.close()

    return True
Exemplo n.º 8
0
def match_gcruns(logger):
    """
    Processes all unmarried Integrations and LogFiles, looking for matches with tolerances.

    :param logger: Active logger that function should log to
    :return bool: True if it exits without issue/concern
    """
    logger.info('Running match_gcruns()')

    try:
        engine, session = connect_to_db(DB_NAME, CORE_DIR)
        Base.metadata.create_all(engine)
    except Exception as e:
        print(f'Connecting to DB failed for reason {e.args}.')
        print(f'The full traceback is {traceback.format_exc()}')
        return

    integrations = session.query(Integration).filter(
        Integration.status == 'single').order_by(Integration.date).all()

    logfiles = session.query(LogFile).filter(
        LogFile.status == 'single').order_by(LogFile.date).all()

    runs = match_integrations_to_logs(integrations, logfiles)

    if runs:
        run_dates = {r.date for r in runs}
        run_dates_in_db = session.query(GcRun).filter(
            GcRun.date.in_(run_dates)).all()
        run_dates_in_db = {r.date for r in run_dates_in_db}

        for r in runs:
            if r.date not in run_dates_in_db:
                session.merge(r)
                run_dates_in_db.add(r.date)
                logger.info(f'GcRun for {runs.date} added.')

        session.commit()

    session.close()
    engine.dispose()

    return
Exemplo n.º 9
0
def process_filters(logger):
    """
    Read any filter files and process them to filter any questionable data.

    Filters are kept in /filters, but only the subset of /filters/final and /filters/unprocessed are processed on each
    run though this function. First, ALL compounds in the data are flagged as unfiltered. This refresh prevents
    filtered data persisting despite being removed from filter files, though it requires all files being processed each
    run. Next, all filter files are parsed and all points contained in them are flagged for specific compounds or 'all'.

    Rules for filtering:
        - The files containing filter objects is authoratative. Calls to this function are regular and will refresh from
            the file, removing all filters, then re-adding them to ensure the file is the only reason one can be
            filtered.
        - Scripts seeking to filter should append to the files or add new ones (see rule 1)
        - Filters are a large JSONized dictionary {'datestring': [compound1tofilter, compound2tofilter, etc]}
        - 'all' is an acceptable compound keyword - more may be added
        - Filters can overlap; eg if both filter the same compound on the same date, that's okay.

    :param logger: Active logger that function should log to
    :return bool: True if it exits without issue/concern
    """

    logger.info("Running process_filters()")

    try:
        engine, session = connect_to_db(DB_NAME, CORE_DIR)
        Base.metadata.create_all(engine)
    except Exception as e:

        print(
            f'Connecting to DB failed for reason {e.args} in process_filters().'
        )
        print(f'The full traceback is {traceback.format_exc()}')
        return

    json_files = []
    for d in FILTER_DIRS:
        json_files.extend(
            [f for f in d.iterdir() if f.is_file() and f.suffix == '.json'])

    # TODO: Ideally, only process filters when one or more has been changed; but all must be processed, per below

    session.query(Compound).update({Compound.filtered: False})
    session.query(OldData).update({OldData.filtered: False})
    session.commit()  # un-filter ALL data prior to editing

    # this is desired since only adding filters means removed ones in the JSON
    # file will still be filtered in the database

    for file in json_files:
        if not file.exists():
            logger.error(
                'JSON file could not be found for filtered data. Filter not processed'
            )
            continue

        filter_name = "/".join(file.parts[-2:])
        proc_name = f'Filter::{filter_name}'  # use filename and containing dir to prevent collisions

        config = session.query(Config).filter(
            Config.processor == proc_name).one_or_none()

        if not config:
            config = Config(processor=proc_name)  # accept all other defaults
            config = session.merge(config)

        logger.info(f'Filtered data for {filter_name} was modified or added.')

        with open(file, 'r') as f:
            filters = json.loads(f.read())

        for date, compound_list in filters.items():
            date = datetime.strptime(
                date[:16], '%Y-%m-%d %H:%M'
            )  # cut string to first 16 to remove extra text

            gc_run = (session.query(GcRun).filter(
                GcRun.date >= date,
                GcRun.date <= date + dt.timedelta(minutes=1)).one_or_none()
                      )  # search within one minute of date

            if gc_run:
                for compound in compound_list:
                    if compound == 'all':
                        for matched_compound in gc_run.compounds:
                            matched_compound.filtered = True
                            session.merge(matched_compound)
                    else:
                        matched_compound = gc_run.compound.get(compound)

                        if matched_compound:
                            matched_compound.filtered = True
                            session.merge(matched_compound)
                        else:
                            logger.warning(
                                f"Compound {compound} was filtered in the JSON file for GcRun with {date} "
                                + "but was not present in the GcRun.")
            else:

                old_run = (session.query(OldData).filter(
                    OldData.date >= date,
                    OldData.date <= date + dt.timedelta(minutes=1)).all())

                if old_run:
                    for compound in compound_list:
                        if compound == 'all':
                            for matched_compound in old_run:
                                matched_compound.filtered = True
                                session.merge(matched_compound)
                        else:
                            matched_compound = search_for_attr_value(
                                old_run, 'name', compound)

                            if matched_compound:
                                matched_compound.filtered = True
                                session.merge(matched_compound)
                            else:
                                logger.warning(
                                    f"Compound {compound} was filtered in the JSON file for OldData with {date} "
                                    + "but was not present in the GcRun.")
                else:
                    logger.warning(
                        f"GcRun with date {date} was not found in the old or new record, "
                        + "but was present in the JSON filter file.")

        session.merge(config)

        session.commit()

    # Clean Filters
    # Since filter files are loaded and put in the Config table, they must be checked at runtime to see if they
    # still exist. Non-existent ones will be removed from the db and warned in the console/logging
    all_filter_configs = (session.query(Config).filter(
        Config.processor.like('Filter::%')).all())

    for config in all_filter_configs:
        file = JSON_PRIVATE_DIR / f'filters/{config.processor.replace("Filter::", "")}'  # recreate filename
        if not file.exists():
            logger.warning(
                f'Filter file {"/".join(file.parts[-2:])} was not found so its config was removed.'
            )
            session.delete(config)
        session.commit()

    session.close()
    engine.dispose()

    return True
def plot_history(logger):
    """
    Plot longterm plots containing data from 2013 onward.

    Queries the database to get all OldData as well as newer data processed by this system and plots them together.

    If OldData exists for a compound, it is combined with newer data and plotted from 2013 to the most recent data. One
    set of plots with a zeroed axis is created to show scale, as well as one with more appropriate bounds for viewing.

    :param logger: logging logger to record to
    :return: bool, True if ran correctly, False if exit on error
    """
    logger.info('Running plot_history()')

    try:
        engine, session = connect_to_db(DB_NAME, CORE_DIR)
    except Exception as e:
        logger.error(
            f'Error {e.args} prevented connecting to the database in plot_history()'
        )
        return False

    remotedir = BOULDAIR_BASE_PATH + '/full_plots'

    compounds_to_plot = (session.query(Quantification.name).join(
        Standard, Quantification.standard_id == Standard.id).filter(
            Standard.name == 'quantlist').all())
    compounds_to_plot[:] = [q.name for q in compounds_to_plot]

    date_limits, major_ticks, minor_ticks = create_monthly_ticks(
        84, days_per_minor=0)

    major_ticks = major_ticks[::6]

    with open(JSON_PUBLIC_DIR / 'zug_long_plot_info.json', 'r') as file:
        compound_limits = json.loads(file.read())

    for name in compounds_to_plot:
        old_results = (session.query(OldData.date, OldData.mr).filter(
            OldData.name == name).order_by(OldData.date).all())

        params = (GcRun.date, Compound.mr)
        filters = (Compound.name == name, GcRun.date >= date_limits['left'],
                   *ambient_filters)

        new_results = abstract_query(params, filters, GcRun.date)

        dates = [o.date for o in old_results] + [n.date for n in new_results]
        mrs = [o.mr for o in old_results] + [n.mr for n in new_results]

        limits = {**date_limits, **compound_limits[name]}

        # Create full plot w/ limits from file.
        fullplot = MixingRatioPlot({name: (dates, mrs)},
                                   limits=limits,
                                   major_ticks=major_ticks,
                                   minor_ticks=minor_ticks,
                                   filepath=FULL_PLOT_DIR / f'{name}_plot.png')

        fullplot.plot()

        file_to_upload = FileToUpload(fullplot.filepath,
                                      remotedir,
                                      staged=True)
        add_or_ignore_plot(file_to_upload, session)

        limits['bottom'] = 0

        # Create full plot w/ 0 limit for the bottom and top limit from file.
        fullplot_zeroed = MixingRatioPlot({name: (dates, mrs)},
                                          limits=limits,
                                          major_ticks=major_ticks,
                                          minor_ticks=minor_ticks,
                                          filepath=FULL_PLOT_DIR /
                                          f'{name}_plot_zeroed.png')

        fullplot_zeroed.plot()

        file_to_upload = FileToUpload(fullplot_zeroed.filepath,
                                      remotedir,
                                      staged=True)
        add_or_ignore_plot(file_to_upload, session)

    session.commit()
    session.close()
    engine.dispose()

    return True
def plot_standard_and_ambient_peak_areas(logger):
    """
    Plots peak area responses for both ambient samples and standard samples.

    Standard peak areas are plotted to show response over time, whereas ambient peak areas are seldom used but still
    appreciated/useful on occasion. Plots are queued to be uploaded the next time a call to upload files is made.

    :param logger: logging logger to record to
    :return: bool, True if ran correctly, False if exit on error
    """
    logger.info('Running plot_standard_and_ambient_peak_areas()')

    try:
        engine, session = connect_to_db(DB_NAME, CORE_DIR)
    except Exception as e:
        logger.error(
            f'Error {e.args} prevented connecting to the database in plot_standard_and_ambient_peak_areas()'
        )
        return False

    date_limits, major_ticks, minor_ticks = create_monthly_ticks(
        18, days_per_minor=7)
    major_ticks[:] = [
        major for num, major in enumerate(major_ticks) if num % 2 == 0
    ]  # utilize only 1/2 of the majors

    remote_pa_dir = BOULDAIR_BASE_PATH + '/PA_plots'
    remote_std_dir = BOULDAIR_BASE_PATH + '/std_PA_plots'

    for compound in ALL_COMPOUNDS:
        # Plot Ambient Peak Areas

        params = (GcRun.date, Compound.pa)
        filters = (*ambient_filters, GcRun.date >= date_limits['left'],
                   Compound.name == compound)

        results = abstract_query(params, filters, GcRun.date)

        dates = [r.date for r in results]
        pas = [r.pa for r in results]

        pa_plot = PeakAreaPlot({compound: [dates, pas]},
                               limits=date_limits,
                               major_ticks=major_ticks,
                               minor_ticks=minor_ticks,
                               filepath=PA_PLOT_DIR /
                               f'{compound}_pa_plot.png')

        pa_plot.plot()
        file_to_upload = FileToUpload(pa_plot.filepath,
                                      remote_pa_dir,
                                      staged=True)
        add_or_ignore_plot(file_to_upload, session)

        filters = (GcRun.date >= date_limits['left'], GcRun.type.in_(
            (1, 2, 3)), Compound.name == compound)

        results = abstract_query(params, filters, GcRun.date)

        dates = [r.date for r in results]
        pas = [r.pa for r in results]

        std_pa_plot = StandardPeakAreaPlot({compound: [dates, pas]},
                                           limits=date_limits,
                                           major_ticks=major_ticks,
                                           minor_ticks=minor_ticks,
                                           filepath=STD_PA_PLOT_DIR /
                                           f'{compound}_plot.png')

        std_pa_plot.plot()
        file_to_upload = FileToUpload(std_pa_plot.filepath,
                                      remote_std_dir,
                                      staged=True)
        add_or_ignore_plot(file_to_upload, session)

    session.commit()
    session.close()
    engine.dispose()
    return True
def plot_dailydata(logger):
    """
    Plots data from the LabView logs as stored in Daily objects.

    Creates one set of plots of parameters logged by LabView on the half-hour. Files are queued to upload the next time
    a call to upload any files is made.

    :param logger: logging logger to record to
    :return: bool, True if ran correctly, False if exit on error
    """
    logger.info('Running plot_dailydata()')

    try:
        engine, session = connect_to_db(DB_NAME, CORE_DIR)
    except Exception as e:
        logger.error(
            f'Error {e.args} prevented connecting to the database in plot_dailydata()'
        )
        return False

    remotedir = BOULDAIR_BASE_PATH + '/dailyplots'

    date_limits, major_ticks, minor_ticks = create_daily_ticks(
        14, minors_per_day=2)
    major_ticks[:] = [
        major for num, major in enumerate(major_ticks) if num % 2 == 0
    ]  # utilize only 1/2 of the majors

    dailies = session.query(Daily).filter(
        Daily.date >= date_limits['left']).all()

    dates = [d.date for d in dailies]

    dailydict = {}
    for param in DAILY_ATTRS:
        dailydict[param] = [getattr(d, param) for d in dailies]

    all_daily_plots = []

    xfer_valve_ebox_plot = LogParameterPlot(
        {
            'Ads Xfer Temp': (dates, dailydict.get('ads_xfer_temp')),
            'Valves Temp': (dates, dailydict.get('valves_temp')),
            'GC Xfer Temp': (dates, dailydict.get('gc_xfer_temp')),
            'Ebox Temp': (dates, dailydict.get('ebox_temp'))
        },
        title='Zugspitze Daily Temperatures (Xfers, Valves, Ebox)',
        filepath=DAILY_PLOT_DIR / 'daily_xfer_valve_ebox_temps.png',
        limits={**date_limits},
        major_ticks=major_ticks,
        minor_ticks=minor_ticks)

    xfer_valve_ebox_plot.plot()
    all_daily_plots.append(xfer_valve_ebox_plot)

    catalyst_temp_plot = LogParameterPlot(
        {'Catalyst Temp': (dates, dailydict.get('catalyst_temp'))},
        title='Zugspitze Daily Catalyst Temperature',
        filepath=DAILY_PLOT_DIR / 'daily_catalyst_temp.png',
        limits={
            **date_limits, 'bottom': 350,
            'top': 490
        },
        major_ticks=major_ticks,
        minor_ticks=minor_ticks)

    catalyst_temp_plot.plot()
    all_daily_plots.append(catalyst_temp_plot)

    inlet_room_temp_plot = LogParameterPlot(
        {
            'Inlet Temp': (dates, dailydict.get('inlet_temp')),
            'Room Temp': (dates, dailydict.get('room_temp'))
        },
        title='Zugspitze Daily Temperatures (Inlet, Room)',
        filepath=DAILY_PLOT_DIR / 'daily_inlet_room_temp.png',
        limits={**date_limits},
        major_ticks=major_ticks,
        minor_ticks=minor_ticks)

    inlet_room_temp_plot.plot()
    all_daily_plots.append(inlet_room_temp_plot)

    mfc_5v_plot = LogParameterPlot(
        {
            '5V (v)': (dates, dailydict.get('v5')),
            'MFC2': (dates, dailydict.get('mfc2')),
            'MFC3': (dates, dailydict.get('mfc3')),
            'MFC1': (dates, dailydict.get('mfc1'))
        },
        title='Zugspitze Daily 5V and MFC Readings',
        filepath=DAILY_PLOT_DIR / 'daily_5v_mfc.png',
        limits={**date_limits},
        y_label_str='',
        major_ticks=major_ticks,
        minor_ticks=minor_ticks)

    mfc_5v_plot.plot()
    all_daily_plots.append(mfc_5v_plot)

    line_zero_pressures_plot = LogParameterPlot(
        {
            'LineP': (dates, dailydict.get('linep')),
            'ZeroP': (dates, dailydict.get('zerop'))
        },
        title='Zugspitze Daily Pressures',
        filepath=DAILY_PLOT_DIR / 'daily_pressures.png',
        limits={**date_limits},
        y_label_str='Pressure (psi)',
        major_ticks=major_ticks,
        minor_ticks=minor_ticks)

    line_zero_pressures_plot.plot()
    all_daily_plots.append(line_zero_pressures_plot)

    for plot in all_daily_plots:
        file_to_upload = FileToUpload(plot.filepath, remotedir, staged=True)
        add_or_ignore_plot(file_to_upload, session)

    session.commit()
    session.close()
    engine.dispose()
Exemplo n.º 13
0
def retrieve_new_files(logger):
    """
    Connect to an AWS Lightsail instance and compare local to remote files.

    Uses a local SQLite database as part of the project to track remote and local files, pulling only those with changed
    file sizes. Only gets GCMS data for this month and the month it was 7 days ago (if different) as an optimization.

    :param logger: Active logger that function should log to
    :return bool: True if it exits without issue/concern
    """
    logger.info('Running retrieve_new_files()')
    engine, session = connect_to_db(DB_NAME, CORE_DIR)
    Base.metadata.create_all(engine)

    con = connect_to_lightsail()

    paths_to_check = ['daily', 'log']

    date = datetime.now()
    paths_to_check.append(
        f'GCMS/{date.year}_{date.month:02}'
    )  # check only this month's and last month's for speed

    month_ago = date - dt.timedelta(days=7)
    # go back one week in case it's been a few days and the last month still has data to retrieve

    if month_ago.month is not date.month:  # if it's a different month, add that to the checklist
        paths_to_check.append(
            f'GCMS/{month_ago.year}_{month_ago.month:02}')  # check

    for path in paths_to_check:
        logger.info(f'Processing {path} files.')
        local_path = CORE_DIR / f'data/{path}'
        remote_path = REMOTE_BASE_PATH + f'/{path}'

        all_remote_files = list_remote_files_recur(
            con, remote_path)  # get a list of all SFTPAttributes + paths

        all_local_files = [str(p) for p in list_files_recur(local_path)
                           ]  # get all local file paths

        new_remote_files = []
        for remote_file in all_remote_files:
            new_remote_files.append(
                RemoteFile(remote_file.st_mtime, remote_file.path))
        # create DB objects for all remote paths

        new_local_files = []
        for remote_file in all_local_files:
            new_local_files.append(
                LocalFile(os.stat(remote_file).st_mtime, remote_file))
        # create DB objects for all local paths

        remote_sets = split_into_sets_of_n(
            [r.path for r in new_remote_files],
            750)  # don't exceed 1K sqlite var limit
        local_sets = split_into_sets_of_n([l.path for l in new_local_files],
                                          750)

        # loop through remote, then local filesets to check against DB and commit any new ones
        for Filetype, filesets, new_files in zip(
            [RemoteFile, LocalFile], [remote_sets, local_sets],
            [new_remote_files, new_local_files]):
            paths_in_db = []
            for set_ in filesets:
                set_ = frozenset(set_)
                # noinspection PyUnresolvedReferences
                in_db = session.query(Filetype.path).filter(
                    Filetype.path.in_(set_)).all()
                if in_db:
                    paths_in_db.extend([p.path for p in in_db])

            for file in new_files:
                if file.path in paths_in_db:
                    file_in_db = session.query(Filetype).filter(
                        Filetype.path == file.path).one_or_none()
                    if file.st_mtime > file_in_db.st_mtime:
                        file_in_db.st_mtime = file.st_mtime
                        session.merge(file_in_db)
                else:
                    session.add(file)
            session.commit()  # commit at the end of each filetype

        # local and remote files are now completely up-to-date in the database
        files_to_retrieve = []
        remote_files = session.query(RemoteFile).order_by(
            RemoteFile.relpath).all()
        local_files = session.query(LocalFile).order_by(
            LocalFile.relpath).all()

        for remote_file in remote_files:
            if remote_file.local is None:
                local_match = search_for_attr_value(local_files, 'relpath',
                                                    remote_file.relpath)
                if local_match:
                    remote_file.local = local_match
                    if remote_file.st_mtime > local_match.st_mtime:
                        files_to_retrieve.append(
                            remote_file
                        )  # add the remote file to download if st_mtime is greater
                else:
                    files_to_retrieve.append(
                        remote_file
                    )  # add the remote file if there's no local copy (create later)
            else:
                if remote_file.st_mtime > remote_file.local.st_mtime:
                    files_to_retrieve.append(remote_file)

        logger.info(f'Remote files: {len(remote_files)}')
        logger.info(f'Local files: {len(local_files)}')
        logger.info(
            f'{len(files_to_retrieve)} files need updating or retrieval.')

        ct = 0
        for remote_file in files_to_retrieve:
            if remote_file.local is not None:
                con.get(remote_file.path, remote_file.local.path
                        )  # get remote file and put in the local's path

                remote_file.local.st_mtime = remote_file.st_mtime  # update, then merge
                session.merge(remote_file)

                logger.info(f'Remote file {remote_file.relpath} was updated.')
                ct += 1
            else:
                new_local_path = LOCAL_BASE_PATH / remote_file.relpath.lstrip(
                    '/')

                scan_and_create_dir_tree(
                    new_local_path
                )  # scan the path and create any needed folders

                new_local_path = str(new_local_path)

                con.get(
                    remote_file.path,
                    new_local_path)  # get file and put in it's relative place

                new_local = LocalFile(remote_file.st_mtime, new_local_path)
                new_local.remote = remote_file

                session.add(
                    new_local
                )  # create, relate, and add the local file that was transferred
                session.merge(remote_file)

                logger.info(
                    f'Remote file {remote_file.relpath} was retrieved and added to local database.'
                )
                ct += 1

            if not ct % 100:
                session.commit()  # routinely commit files in batches of 100
                logger.info(f'{ct} of {len(files_to_retrieve)} retrieved.')

        session.commit()

    con.close()
    session.close()
    engine.dispose()
    return True
Exemplo n.º 14
0
def load_historic_data(logger):
    """
    Loads a modified file from Wei Wang with 2013 - 2017 mixing ratio data.

    Creates OldData objects by reading a provided spreadsheet and storing them in the database. The file is read once
    (when the Config has the default/unchanged date of (1900, 1, 1)), and will not be read again. Changes to the file
    can be processed by removing the Config 'HistoricData' from the database.

    :param logger: logging logger to record to
    :return: bool, True if ran corrected, False if exit on error
    """

    logger.info('Running load_historic_data()')
    try:
        engine, session = connect_to_db(DB_NAME, CORE_DIR)
    except Exception as e:
        logger.error(f'Error {e.args} prevented connecting to the database in load_historic_data()')
        return False

    config = session.query(Config).filter(Config.processor == 'HistoricData').one_or_none()
    if not config:
        config = Config(processor='HistoricData')
        config = session.merge(config)

    if config.last_data_date == datetime(1900, 1, 1):  # it's never been run before

        old_data = pd.read_excel(HISTORIC_DATA_SHEET, header=0, sheet_name='AmbientMixingRatio')
        old_data['Time stamp'] = old_data['Time stamp'].replace('--', np.nan)
        old_data['Time stamp'] = old_data['Time stamp'].str.lstrip()
        old_data.dropna(axis=0, subset=['Time stamp'], inplace=True)
        old_data.index = pd.to_datetime(old_data['Time stamp'])
        old_data.dropna(axis=1, how='all', inplace=True)

        compounds_to_plot = (session.query(Quantification.name)
                             .join(Standard, Quantification.standard_id == Standard.id)
                             .filter(Standard.name == 'quantlist').all())
        compounds_to_plot[:] = [q.name for q in compounds_to_plot]

        dates = [d.to_pydatetime() for d in old_data.index.to_list()]

        data = []
        for cpd in compounds_to_plot:
            try:
                compound_values = [c for c in old_data[cpd].values.tolist()]
            except KeyError:
                logger.warning(f'Compound {cpd} not found in historic data sheeet.')
                continue

            for date, val in zip(dates, compound_values):
                data.append(OldData(cpd, date, val))

        if data:
            data_dates = {d.date for d in data}
            data_in_db = session.query(OldData.date).filter(OldData.date.in_(data_dates)).all()
            data_in_db[:] = [d.date for d in data_in_db]
            for datum in data:
                if datum.date not in data_in_db:
                    # this allows all compounds for a single date to load on the initial run, then blocks all from
                    # being loaded any subsequent times
                    session.add(datum)

        config.last_data_date = datetime.now()
        session.merge(config)

    session.commit()
    session.close()
    engine.dispose()
    return True
Exemplo n.º 15
0
def quantify_runs(logger):
    """
    Iterates through unquantified GcRuns and attempts to quantify them.

    Queries and iterates through all unquantified GcRuns. If a working standard is found within six hours of that sample
    it will use that standard to quantify it.

    :param logger: Active logger that function should log to
    :return bool: True if it exits without issue/concern
    """
    logger.info('Running quantify_runs()')

    try:
        engine, session = connect_to_db(DB_NAME, CORE_DIR)
        Base.metadata.create_all(engine)
    except Exception as e:

        print(
            f'Connecting to DB failed for reason {e.args} in quantify_runs().')
        print(f'The full traceback is {traceback.format_exc()}')
        return

    runs = session.query(GcRun).filter(GcRun.quantified == False).all()

    voc_list = (session.query(Quantification.name).join(
        Standard, Quantification.standard_id == Standard.id).filter(
            Standard.name == 'vocs').all())
    voc_list[:] = [q.name for q in voc_list]

    if not voc_list:
        logger.error('Could not load VOC list from Standards.')
        return False

    std = None  # no standard found yet

    for run in runs:
        # call blank subtract with no provided blank (it will find one if possible), and don't commit changes
        run.blank_subtract(session=session,
                           compounds_to_subtract=voc_list,
                           commit=False)

    # commit once after all runs are done for performance
    session.commit()

    runs = session.query(GcRun).filter(GcRun.quantified == False).all()

    for run in runs:
        # find the certified standard that applies to this time period
        if not std or not (std.start_date <= run.date < std.end_date):
            std = (session.query(Standard).filter(
                Standard.start_date <= run.date,
                Standard.end_date > run.date).one_or_none())
        if not std:
            continue
        else:
            run.standard = std

        # find the working standard if this run wasn't one
        if run.type not in {1, 2, 3}:
            close_standards = (session.query(GcRun).filter(
                GcRun.type.in_({1, 2, 3})).filter(
                    GcRun.date >= run.date - dt.timedelta(hours=6),
                    GcRun.date < run.date + dt.timedelta(hours=6)).all())

            match, delta = find_closest_date(run.date,
                                             [r.date for r in close_standards],
                                             how='abs')
            run.working_std = search_for_attr_value(close_standards, 'date',
                                                    match)

            if run.standard:
                run.quantify()
            else:
                logger.warning(
                    f'No Standard found for the GcRun at {run.date}')

            session.merge(run)

    session.commit()
    session.close()
    engine.dispose()
Exemplo n.º 16
0
This was created to allow creating blanket filters for specific time periods. Simply enter a start and end date and
a file containing every date between them will be generated and placed in "filters/final/".

compounds is specified as "all" so that all compounds are filtered by default, but compounds could be changed to
compounds = ["benzene", "toluene"] etc if desired for a specific output

Filters created with this script will be read in and filtered on the next pass of zugspitze_runtime.py
"""
import json
from datetime import datetime

from IO.db.models import GcRun, OldData
from IO import connect_to_db
from settings import CORE_DIR, DB_NAME

engine, session = connect_to_db(DB_NAME, CORE_DIR)

filename = input('Filename for filter? (Do not include filetype) ')

start_date = input(
    'What date do you want to filter from? (inclusive mm/dd/yyyy HH:MM) ')
end_date = input(
    'What date do you want to filter to? (inclusive mm/dd/yyyy HH:MM) ')

start_date = datetime.strptime(start_date, '%m/%d/%Y %H:%M')
end_date = datetime.strptime(end_date, '%m/%d/%Y %H:%M')

compounds = ['H-2402']  # can be specific list of compounds

dates = (session.query(
    GcRun.date).filter(GcRun.date >= start_date,
def plot_logdata(logger):
    """
    Plots data from the LabView logs as stored in LogFile objects.

    Creates one set of plots of parameters logged by LabView with each run. Files are queued to upload the next time a
    call to upload any files is made.

    :param logger: logging logger to record to
    :return: bool, True if ran correctly, False if exit on error
    """
    logger.info('Running plot_logdata()')

    try:
        engine, session = connect_to_db(DB_NAME, CORE_DIR)
    except Exception as e:
        logger.error(
            f'Error {e.args} prevented connecting to the database in plot_logdata()'
        )
        return False

    remotedir = BOULDAIR_BASE_PATH + '/logplots'

    date_limits, major_ticks, minor_ticks = create_daily_ticks(
        14, minors_per_day=2)
    major_ticks[:] = [
        major for num, major in enumerate(major_ticks) if not num % 2
    ]  # utilize only 1/2 of the majors

    logs = session.query(LogFile).filter(
        LogFile.date >= date_limits.get('left')).all()

    logdict = {}
    for param in LOG_ATTRS:
        logdict[param] = [getattr(l, param) for l in logs]

    dates = [l.date for l in logs]

    all_plots = []

    sample_pressure_plot = LogParameterPlot(
        {
            'Sample Pressure Start': (dates, logdict.get('sample_p_start')),
            'Sample Flow (V)': (dates, logdict.get('sample_flow_act'))
        },
        title='Zugspitze Sample Pressures',
        filepath=LOG_PLOT_DIR / 'log_sample_pressure_flow.png',
        limits={
            **date_limits, 'bottom': 0,
            'top': 4
        },
        y_label_str='',
        major_ticks=major_ticks,
        minor_ticks=minor_ticks)

    sample_pressure_plot.plot()
    all_plots.append(sample_pressure_plot)

    sample_pressure_during_plot = LogParameterPlot(
        {'Sample Pressure During': (dates, logdict.get('sample_p_during'))},
        title='Zugspitze Sample Pressure During Run',
        filepath=LOG_PLOT_DIR / 'log_sample_pressure_during.png',
        limits={
            **date_limits, 'bottom': 4,
            'top': 12
        },
        y_label_str='Pressure (psi)',
        major_ticks=major_ticks,
        minor_ticks=minor_ticks)

    sample_pressure_during_plot.plot()
    all_plots.append(sample_pressure_during_plot)

    gchead_pressures_plot = LogParameterPlot(
        {
            'GC HeadP Start': (dates, logdict.get('gcheadp_start')),
            'GC HeadP During': (dates, logdict.get('gcheadp_during'))
        },
        title='Zugspitze GC Head Pressures',
        filepath=LOG_PLOT_DIR / 'log_gcheadp_pressures.png',
        limits={**date_limits},
        y_label_str='Pressure (psi)',
        major_ticks=major_ticks,
        minor_ticks=minor_ticks)

    gchead_pressures_plot.plot()
    all_plots.append(gchead_pressures_plot)

    active_wat_ads_traps_plot = LogParameterPlot(
        {
            'WT @ Sample Start': (dates, logdict.get('wt_sample_start')),
            'WT @ Sample End': (dates, logdict.get('wt_sample_end')),
            'Ads A @ Sample Start': (dates, logdict.get('ads_a_sample_start')),
            'Ads A @ Sample End': (dates, logdict.get('ads_a_sample_end'))
        },
        title='Zugspitze Active Ads and Water Trap Temperatures',
        filepath=LOG_PLOT_DIR / 'log_wat_ads_active_traptemps.png',
        limits={
            **date_limits, 'bottom': -55,
            'top': -30
        },
        major_ticks=major_ticks,
        minor_ticks=minor_ticks)

    active_wat_ads_traps_plot.plot()
    all_plots.append(active_wat_ads_traps_plot)

    inactive_wat_ads_traps_plot = LogParameterPlot(
        {
            'Ads B @ Sample Start': (dates, logdict.get('ads_b_sample_start')),
            'Ads B @ Sample End': (dates, logdict.get('ads_b_sample_end'))
        },
        title='Zugspitze Inactive Ads Trap Temperatures',
        filepath=LOG_PLOT_DIR / 'log_ads_inactive_traptemps.png',
        limits={
            **date_limits, 'bottom': 15,
            'top': 35
        },
        major_ticks=major_ticks,
        minor_ticks=minor_ticks)

    inactive_wat_ads_traps_plot.plot()
    all_plots.append(inactive_wat_ads_traps_plot)

    traps_temps_plot = LogParameterPlot(
        {
            'Trap @ FH': (dates, logdict.get('trap_temp_fh')),
            'Trap @ Inject': (dates, logdict.get('trap_temp_inject')),
            'Trap @ Bakeout': (dates, logdict.get('trap_temp_bakeout'))
        },
        title='Zugspitze Trap Temperatures',
        filepath=LOG_PLOT_DIR / 'log_trap_temps.png',
        limits={**date_limits},
        major_ticks=major_ticks,
        minor_ticks=minor_ticks)

    traps_temps_plot.plot()
    all_plots.append(traps_temps_plot)

    battery_voltages_plot = LogParameterPlot(
        {
            'BattV @ Inject': (dates, logdict.get('battv_inject')),
            'BattV @ Bakeout': (dates, logdict.get('battv_bakeout'))
        },
        title='Zugspitze Battery Voltages',
        filepath=LOG_PLOT_DIR / 'log_battery_voltages.png',
        limits={
            **date_limits, 'bottom': 8,
            'top': 14
        },
        y_label_str='Voltage (V)',
        major_ticks=major_ticks,
        minor_ticks=minor_ticks)

    battery_voltages_plot.plot()
    all_plots.append(battery_voltages_plot)

    gc_start_wt_temps_plot = LogParameterPlot(
        {
            'GC Start Temp': (dates, logdict.get('gc_start_temp')),
            'WT Hot Temp': (dates, logdict.get('wt_hot_temp'))
        },
        title='Zugspitze GC Start and WT Hot Temps',
        filepath=LOG_PLOT_DIR / 'log_gc_start_wthot_temps.png',
        limits={
            **date_limits, 'bottom': 0,
            'top': 75
        },
        major_ticks=major_ticks,
        minor_ticks=minor_ticks)

    gc_start_wt_temps_plot.plot()
    all_plots.append(gc_start_wt_temps_plot)

    heat_outs_plot = LogParameterPlot(
        {
            'HeatOut @ FH': (dates, logdict.get('trapheatout_flashheat')),
            'HeatOut @ Inject': (dates, logdict.get('trapheatout_inject')),
            'HeatOut @ Bakeout': (dates, logdict.get('trapheatout_bakeout'))
        },
        title='Zugspitze Heat Outputs',
        filepath=LOG_PLOT_DIR / 'log_heat_outs.png',
        limits={**date_limits},
        major_ticks=major_ticks,
        minor_ticks=minor_ticks)

    heat_outs_plot.plot()
    all_plots.append(heat_outs_plot)

    oven_mfc_ramp_plot = TwoAxisLogParameterPlot(
        {'GC Oven Temp': (dates, logdict.get('gc_oven_temp'))},
        {'MFC1 Ramp': (dates, logdict.get('mfc1_ramp'))},
        title='Zugspitze Oven Temperature and MFC1 Ramp',
        filepath=LOG_PLOT_DIR / 'log_oven_mfc_ramp.png',
        limits_y1={
            'right': date_limits.get('right'),
            'left': date_limits.get('left'),
            'bottom': 180,
            'top': 230
        },
        limits_y2={
            'bottom': .65,
            'top': .85
        },
        major_ticks=major_ticks,
        minor_ticks=minor_ticks,
        y2_label_str='MFC1 Ramp',
        color_set_y2=('orange', ))

    oven_mfc_ramp_plot.plot()
    all_plots.append(oven_mfc_ramp_plot)

    for plot in all_plots:
        file_to_upload = FileToUpload(plot.filepath, remotedir, staged=True)
        add_or_ignore_plot(file_to_upload, session)

    session.commit()
    session.close()
    engine.dispose()