示例#1
0
def get_list_of_files_in_range(
    tstart, tstop, target_dir=get_env_variable('STAGING_DIRECTORY')):
    """!!!Not Implemented!!!

    :param tstart: [description]
    :type tstart: [type]
    :param tstop: [description]
    :type tstop: [type]
    :param target_dir: [description], defaults to get_env_variable('STAGING_DIRECTORY')
    :type target_dir: [type], optional
    """
    pass
示例#2
0
def _create_archive_database():
    """ Create an empty archive.meta.info.db3 database if it doesn't exist

        This file is responsible for tracking the ingest history/progess 
        as well as the individual files that have been ingested.
    """
    db_filepath = os.path.join(TELEMETRY_ARCHIVE,'archive.meta.info.db3')
    if not os.path.exists(db_filepath):
        with open(get_env_variable('JETA_ARCHIVE_DEFINITION_SOURCE'), 'r') as db_definition_file:
            db_definition_script = db_definition_file.read()
            print('Creating archive tracking database (sqlite3) {}'.format(db_filepath))
            db = sqlite3.connect(db_filepath)
            cur = db.cursor()
            cur.executescript(db_definition_script)
            cur.close()
示例#3
0
def get_total_archive_area_size(area="archive"):
    """Get the size of a archive area in bytes

    :param area: the area in the archive to get for which to get the size, defaults to "archive"
    :type area: str, optional
    :return: size of the archive area in bytes
    :rtype: float
    """
    from pathlib import Path

    area_map = {
        'archive': Path('/srv/telemetry/archive/data/tlm'),
        'staging': Path(get_env_variable('STAGING_DIRECTORY')),
    }

    root_directory = area_map[area]

    return sum(f.stat().st_size for f in root_directory.glob('**/*')
               if f.is_file())
示例#4
0
def main():
    """ Update stats for the data archive based on opt parameters.

    This may be called in a loop by the program-level main().
    """

    logger.info('Runtime options: \n{}'.format(opt))
    logger.info('Update Module: {}'.format(os.path.abspath(__file__)))
    logger.info('Fetch Module: {}'.format(os.path.abspath(fetch.__file__)))

    # TODO: Write a tests
    # colnames = [x for x in pickle.load('msids in the arhcive file', 'rb')) if x not in fetch.IGNORE_COLNAMES]

    colnames = None
    ALL_KNOWN_MSID_METAFILE = get_env_variable('ALL_KNOWN_MSID_METAFILE')
    with h5py.File(ALL_KNOWN_MSID_METAFILE, 'r') as h5:
        colnames = list(h5.keys())

    if opt.update_stats:
        for colname in colnames:
            msid = statistics(colname, 'daily')
            statistics(colname, '5min', msid)
        logger.info(f'Stats updated.')
示例#5
0
import unittest
import pytest
from unittest.mock import patch

# jeta specific modules
from jeta.archive.utils import get_env_variable

from jeta.staging.manage import (
    get_staged_files_by_date,
    remove_activity,
    _format_activity_destination,
    _create_activity_staging_area
)

HOME = str(Path.home())
STAGING_DIRECTORY = get_env_variable('STAGING_DIRECTORY')


class TestStaging(unittest.TestCase):

    def setUp(self) -> None:
        return super().setUp()

    def tearDown(self) -> None:
        return super().tearDown()

    def test_smoke_test(self):
        assert 1 == 1

    def test_can_import_manage(self):
        import importlib
示例#6
0
import h5py

import pickle
import sqlite3
import glob
import ntpath
import uuid

import pyyaks.logger
import pyyaks.context

import jeta.archive.file_defs as file_defs
from jeta.archive.utils import get_env_variable

ENG_ARCHIVE = get_env_variable('TELEMETRY_ARCHIVE')
STAGING_DIRECTORY = get_env_variable('STAGING_DIRECTORY')

# The backlog is a special activity that hosts ingest files that should still be ingested
# but processing them is behind. i.e. can be any number of files for any range.
BACKLOG_DIRECTORY = f'{STAGING_DIRECTORY}backlog/'


def _format_activity_destination(dst):
    return f'{STAGING_DIRECTORY}{dst}/'


def _create_activity_staging_area(name, description=""):

    _activity = f"{STAGING_DIRECTORY}{name}"
    if os.path.exists(_activity):
示例#7
0
import pickle
import sqlite3

import numpy as np
import h5py
import tables

# import pyyaks.logger
# import pyyaks.context

from astropy.time import Time

import jeta.archive.file_defs as file_defs
from jeta.archive.utils import get_env_variable

ENG_ARCHIVE = get_env_variable('ENG_ARCHIVE')
TELEMETRY_ARCHIVE = get_env_variable('TELEMETRY_ARCHIVE')
ALL_KNOWN_MSID_METAFILE = get_env_variable('ALL_KNOWN_MSID_METAFILE')
# JETA_LOGS = get_env_variable('JETA_LOGS')


# logger = pyyaks.logger.get_logger(
#     filename=f'{JETA_LOGS}/jeta.operations.log',
#     name='jeta_operations_logger',
#     level='INFO',
#     format="%(asctime)s %(message)s"
# )

def _create_archive_database():
    """ Create an empty archive.meta.info.db3 database if it doesn't exist
示例#8
0
# Frequency Per Day to ingest
INGEST_CADENCE = 2

# An assumption about the average number of files per ingest.
# default: 60 files covering an ~24mins interval each.
AVG_NUMBER_OF_FILES = 60

# The expected life time of the mission in years
MISSION_LIFE_IN_YEARS = 20

# The avg maximum number of rows per file per
# analysis performed by DMS.
MAX_ROWS_PER_FILE = 10_280_811

# Archive persistent storage locations on disk.
ENG_ARCHIVE = get_env_variable('ENG_ARCHIVE')
TELEMETRY_ARCHIVE = get_env_variable('TELEMETRY_ARCHIVE')
STAGING_DIRECTORY = get_env_variable('STAGING_DIRECTORY')
JETA_LOGS = get_env_variable('JETA_LOGS')
ALL_KNOWN_MSID_METAFILE = get_env_variable('ALL_KNOWN_MSID_METAFILE')
BYPASS_GAP_CHECK = int(os.environ['JETA_BYPASS_GAP_CHECK'])
UPDATE_STATS = int(os.environ['JETA_UPDATE_STATS'])


# Calculate the number of files per year for archive space allocation prediction/allocation.
FILES_IN_A_YEAR = (AVG_NUMBER_OF_FILES * INGEST_CADENCE) * 365

ft = fetch.ft

msid_files = pyyaks.context.ContextDict('update.msid_files',
                                        basedir=ENG_ARCHIVE)