def get_list_of_files_in_range( tstart, tstop, target_dir=get_env_variable('STAGING_DIRECTORY')): """!!!Not Implemented!!! :param tstart: [description] :type tstart: [type] :param tstop: [description] :type tstop: [type] :param target_dir: [description], defaults to get_env_variable('STAGING_DIRECTORY') :type target_dir: [type], optional """ pass
def _create_archive_database(): """ Create an empty archive.meta.info.db3 database if it doesn't exist This file is responsible for tracking the ingest history/progess as well as the individual files that have been ingested. """ db_filepath = os.path.join(TELEMETRY_ARCHIVE,'archive.meta.info.db3') if not os.path.exists(db_filepath): with open(get_env_variable('JETA_ARCHIVE_DEFINITION_SOURCE'), 'r') as db_definition_file: db_definition_script = db_definition_file.read() print('Creating archive tracking database (sqlite3) {}'.format(db_filepath)) db = sqlite3.connect(db_filepath) cur = db.cursor() cur.executescript(db_definition_script) cur.close()
def get_total_archive_area_size(area="archive"): """Get the size of a archive area in bytes :param area: the area in the archive to get for which to get the size, defaults to "archive" :type area: str, optional :return: size of the archive area in bytes :rtype: float """ from pathlib import Path area_map = { 'archive': Path('/srv/telemetry/archive/data/tlm'), 'staging': Path(get_env_variable('STAGING_DIRECTORY')), } root_directory = area_map[area] return sum(f.stat().st_size for f in root_directory.glob('**/*') if f.is_file())
def main(): """ Update stats for the data archive based on opt parameters. This may be called in a loop by the program-level main(). """ logger.info('Runtime options: \n{}'.format(opt)) logger.info('Update Module: {}'.format(os.path.abspath(__file__))) logger.info('Fetch Module: {}'.format(os.path.abspath(fetch.__file__))) # TODO: Write a tests # colnames = [x for x in pickle.load('msids in the arhcive file', 'rb')) if x not in fetch.IGNORE_COLNAMES] colnames = None ALL_KNOWN_MSID_METAFILE = get_env_variable('ALL_KNOWN_MSID_METAFILE') with h5py.File(ALL_KNOWN_MSID_METAFILE, 'r') as h5: colnames = list(h5.keys()) if opt.update_stats: for colname in colnames: msid = statistics(colname, 'daily') statistics(colname, '5min', msid) logger.info(f'Stats updated.')
import unittest import pytest from unittest.mock import patch # jeta specific modules from jeta.archive.utils import get_env_variable from jeta.staging.manage import ( get_staged_files_by_date, remove_activity, _format_activity_destination, _create_activity_staging_area ) HOME = str(Path.home()) STAGING_DIRECTORY = get_env_variable('STAGING_DIRECTORY') class TestStaging(unittest.TestCase): def setUp(self) -> None: return super().setUp() def tearDown(self) -> None: return super().tearDown() def test_smoke_test(self): assert 1 == 1 def test_can_import_manage(self): import importlib
import h5py import pickle import sqlite3 import glob import ntpath import uuid import pyyaks.logger import pyyaks.context import jeta.archive.file_defs as file_defs from jeta.archive.utils import get_env_variable ENG_ARCHIVE = get_env_variable('TELEMETRY_ARCHIVE') STAGING_DIRECTORY = get_env_variable('STAGING_DIRECTORY') # The backlog is a special activity that hosts ingest files that should still be ingested # but processing them is behind. i.e. can be any number of files for any range. BACKLOG_DIRECTORY = f'{STAGING_DIRECTORY}backlog/' def _format_activity_destination(dst): return f'{STAGING_DIRECTORY}{dst}/' def _create_activity_staging_area(name, description=""): _activity = f"{STAGING_DIRECTORY}{name}" if os.path.exists(_activity):
import pickle import sqlite3 import numpy as np import h5py import tables # import pyyaks.logger # import pyyaks.context from astropy.time import Time import jeta.archive.file_defs as file_defs from jeta.archive.utils import get_env_variable ENG_ARCHIVE = get_env_variable('ENG_ARCHIVE') TELEMETRY_ARCHIVE = get_env_variable('TELEMETRY_ARCHIVE') ALL_KNOWN_MSID_METAFILE = get_env_variable('ALL_KNOWN_MSID_METAFILE') # JETA_LOGS = get_env_variable('JETA_LOGS') # logger = pyyaks.logger.get_logger( # filename=f'{JETA_LOGS}/jeta.operations.log', # name='jeta_operations_logger', # level='INFO', # format="%(asctime)s %(message)s" # ) def _create_archive_database(): """ Create an empty archive.meta.info.db3 database if it doesn't exist
# Frequency Per Day to ingest INGEST_CADENCE = 2 # An assumption about the average number of files per ingest. # default: 60 files covering an ~24mins interval each. AVG_NUMBER_OF_FILES = 60 # The expected life time of the mission in years MISSION_LIFE_IN_YEARS = 20 # The avg maximum number of rows per file per # analysis performed by DMS. MAX_ROWS_PER_FILE = 10_280_811 # Archive persistent storage locations on disk. ENG_ARCHIVE = get_env_variable('ENG_ARCHIVE') TELEMETRY_ARCHIVE = get_env_variable('TELEMETRY_ARCHIVE') STAGING_DIRECTORY = get_env_variable('STAGING_DIRECTORY') JETA_LOGS = get_env_variable('JETA_LOGS') ALL_KNOWN_MSID_METAFILE = get_env_variable('ALL_KNOWN_MSID_METAFILE') BYPASS_GAP_CHECK = int(os.environ['JETA_BYPASS_GAP_CHECK']) UPDATE_STATS = int(os.environ['JETA_UPDATE_STATS']) # Calculate the number of files per year for archive space allocation prediction/allocation. FILES_IN_A_YEAR = (AVG_NUMBER_OF_FILES * INGEST_CADENCE) * 365 ft = fetch.ft msid_files = pyyaks.context.ContextDict('update.msid_files', basedir=ENG_ARCHIVE)