def setUp(self):
        log_config.configure('DEBUG')

        self.setUpPyfakefs()

        self.out_path = os.path.join('/', 'repo', 'outputs')
        self.in_path = os.path.join('/', 'repo', 'inputs')
        self.metadata_path = os.path.join('prt', '2019', '10', '02')

        self.data_files = [
            'GRSM_prt_6974_2019-10-02.parquet',
            'UNDE_prt_6848_2019-10-02.parquet',
            'WREF_prt_6848_2019-10-02.parquet'
        ]

        self.expected_files = [
            'prt_6974_2019-10-02.parquet',
            'prt_6848_2019-10-02.parquet',
        ]

        for data_file in self.data_files:
            name_parts = data_file.split('_')
            source_id = name_parts[2]
            data_path = os.path.join(self.in_path, self.metadata_path,
                                     source_id, data_file)
            # use real data file to convert
            actual_data_file_path = os.path.join(os.path.dirname(__file__),
                                                 data_file)
            self.fs.add_real_file(actual_data_file_path, target_path=data_path)
示例#2
0
    def setUp(self):
        log_config.configure('DEBUG')

        self.location = 'CFGLOC113507'
        # The context group to find in the location file should match the existing file entry 'aspirated-single-224'.
        self.group = 'aspirated-triple-'

        self.setUpPyfakefs()

        self.in_path = os.path.join('/', 'inputs')
        self.out_path = os.path.join('/', 'outputs')

        self.metadata_path = os.path.join('dualfan', '2019', '05', '21')

        inputs_root = os.path.join(self.in_path, 'repo', self.metadata_path)

        data_path = os.path.join(inputs_root, self.location, 'data',
                                 'data.ext')
        locations_path = os.path.join(inputs_root, self.location, 'location',
                                      'locations.json')

        self.fs.create_file(data_path)

        # Use real location file for parsing
        actual_location_file_path = os.path.join(os.path.dirname(__file__),
                                                 'test-locations.json')
        self.fs.add_real_file(actual_location_file_path,
                              target_path=locations_path)
    def setUp(self):
        log_config.configure('DEBUG')

        # File path indices.
        self.source_type_index = 3
        self.year_index = 4
        self.month_index = 5
        self.day_index = 6
        self.source_id_index = 7
        self.data_type_index = 8

        self.setUpPyfakefs()

        self.out_path = os.path.join('/', 'outputs')
        self.metadata_path = os.path.join('prt', '2019', '05', '21', '00001')

        self.context = 'aspirated-triple'  # The context to find in the location file.

        self.in_path = os.path.join('/', 'inputs')
        inputs_path = os.path.join(self.in_path, 'merged', self.metadata_path)

        data_path = os.path.join(inputs_path, 'data', 'data.ext')
        flags_path = os.path.join(inputs_path, 'flags', 'flags.ext')
        locations_path = os.path.join(inputs_path, 'location', 'locations.json')
        uncertainty_coefficient_path = os.path.join(inputs_path,
                                                    'uncertainty_coefficient', 'uncertaintyCoefficient.json')

        self.fs.create_file(data_path)
        self.fs.create_file(flags_path)
        self.fs.create_file(uncertainty_coefficient_path)

        # Use real location file for parsing
        actual_location_file_path = os.path.join(os.path.dirname(__file__), 'test-locations.json')
        self.fs.add_real_file(actual_location_file_path, target_path=locations_path)
    def setUp(self):
        """Set required files in mock filesystem."""

        log_config.configure('DEBUG')

        self.out_dir = os.path.join('/', 'tmp', 'outputs')
        self.location = 'CFGLOC112154'
        self.input_root = os.path.join(
            '/',
            'tmp',
            'inputs',
        )

        source_month = os.path.join('prt', '2019', '01')
        self.input_data_dir = os.path.join(self.input_root, source_month, '03')
        self.source_dir = os.path.join(source_month, '03', self.location)

        self.out_name = 'outname'
        self.target_date = '2019-01-03'
        self.date_index = 2
        self.loc_index = 1

        self.setUpPyfakefs()

        self.data_dir = 'data'

        # Data file
        self.source_file_name = 'prt_CFGLOC112154_2019-01-03_basicStats_030.ext'
        self.target_file_name = 'outname_2019-01-03_CFGLOC112154_basicStats_030.ext'
        data_path = os.path.join(self.input_root, self.source_dir,
                                 self.data_dir, self.source_file_name)
        self.fs.create_file(data_path)
        print('Egress test')
        print(f'input data_path: {data_path}')
示例#5
0
    def setUp(self):

        log_config.configure('DEBUG')

        self.setUpPyfakefs()

        self.out_path = os.path.join('/', 'outputs')

        self.dir1 = 'dir1'
        self.dir_2 = 'dir2'
        self.dir_3 = 'dir3'

        self.file_name1 = 'dir1.ext'
        self.file_name2 = 'dir2.ext'
        self.file_name3 = 'dir3.ext'

        #  Set required files in mock filesystem.
        self.in_dir = os.path.join('/', 'inputs')
        in_dir_path = os.path.join(self.in_dir, 'dir')

        file1 = os.path.join(in_dir_path, self.dir1, self.file_name1)
        file2 = os.path.join(in_dir_path, self.dir_2, self.file_name2)
        file3 = os.path.join(in_dir_path, self.dir_3, self.file_name3)
        self.fs.create_file(file1)
        self.fs.create_file(file2)
        self.fs.create_file(file3)

        self.out_path1 = os.path.join(self.out_path, self.dir1,
                                      self.file_name1)
        self.out_path2 = os.path.join(self.out_path, self.dir_2,
                                      self.file_name2)
        self.out_path3 = os.path.join(self.out_path, self.dir_3,
                                      self.file_name3)
示例#6
0
def main():
    env = environs.Env()
    data_path = env('DATA_PATH')
    location_path = env('LOCATION_PATH')
    empty_files_path = env('EMPTY_FILES_PATH')
    output_directories = env('OUTPUT_DIRECTORIES')
    out_path = env('OUT_PATH')
    log_level = env('LOG_LEVEL')
    log_config.configure(log_level)

    # directory names to output should be a comma separated string.
    if ',' in output_directories:
        output_directories = output_directories.split(',')

    # empty file paths
    empty_files_paths = get_empty_file_paths(empty_files_path)
    empty_data_path = empty_files_paths.get('empty_data_path')
    empty_flags_path = empty_files_paths.get('empty_flags_path')
    empty_uncertainty_data_path = empty_files_paths.get('empty_uncertainty_data_path')

    date_constraints = get_date_constraints()
    if date_constraints is not None:
        start_date = date_constraints.get('start_date')
        end_date = date_constraints.get('end_date')
        keys = gap_filler.get_data_files(data_path, out_path, start_date=start_date, end_date=end_date)
        gap_filler.process_location_files(location_path, keys, out_path, output_directories,
                                          empty_data_path, empty_flags_path, empty_uncertainty_data_path,
                                          start_date=start_date, end_date=end_date)
    else:
        keys = gap_filler.get_data_files(data_path, out_path)
        gap_filler.process_location_files(location_path, keys, out_path, output_directories,
                                          empty_data_path, empty_flags_path, empty_uncertainty_data_path)
示例#7
0
    def setUp(self):
        log_config.configure('DEBUG')

        self.setUpPyfakefs()

        self.in_path = os.path.join('/', 'inputs')
        self.out_path = os.path.join('/', 'outputs')
        self.regularized_path = os.path.join(self.in_path, 'regularized')
        self.quality_path = os.path.join(self.in_path, 'quality')

        #  regularized file
        self.fs.create_file(
            os.path.join(self.regularized_path, 'prt', '2018', '01', '01',
                         'CFGLOC112154', 'flags',
                         'prt_CFGLOC112154_2018-01-01_flagsCal.ext'))
        #  quality file
        self.fs.create_file(
            os.path.join(self.quality_path, 'prt', '2018', '01', '01',
                         'CFGLOC112154', 'flags',
                         'prt_CFGLOC112154_2018-01-01_plausibility.ext'))
        # quality file 2
        self.fs.create_file(
            os.path.join(self.quality_path, 'prt', '2018', '01', '02',
                         'CFGLOC112154', 'flags',
                         'prt_CFGLOC112154_2018-01-01_plausibility.ext'))
    def setUp(self):

        log_config.configure('DEBUG')

        self.setUpPyfakefs()

        self.out_path = os.path.join('/', 'outputs')
        self.metadata_path = os.path.join('prt', '2019', '05', '17', '00001')
        self.calibrated_path = os.path.join('/', 'inputs', 'calibrated')
        self.location_path = os.path.join('/', 'inputs', 'location')

        #  Create calibrated input files.
        calibrated_root = os.path.join(self.calibrated_path,
                                       self.metadata_path)
        data_path = os.path.join(calibrated_root, 'data', 'data.ext')
        flags_path = os.path.join(calibrated_root, 'flags', 'flags.ext')
        uncertainty_path = os.path.join(calibrated_root, 'uncertainty',
                                        'uncertainty.json')
        test_extra_dir_path = os.path.join(calibrated_root, 'test', 'test_dir',
                                           'test.json')

        self.fs.create_file(data_path)
        self.fs.create_file(flags_path)
        self.fs.create_file(uncertainty_path)
        self.fs.create_file(test_extra_dir_path)

        #  Create location input file.
        locations_path = os.path.join(self.location_path, 'prt', '00001',
                                      'locations.json')
        self.fs.create_file(locations_path)

        #  Create output dir
        self.fs.create_dir(self.out_path)
    def setUp(self):
        log_config.configure('DEBUG')

        self.location = 'CFGLOC113507'

        # The context group to find in the test location file entry 'aspirated-single-224'.
        self.group = 'aspirated-triple-'

        self.setUpPyfakefs()

        self.source_id = '00001'

        self.in_path = os.path.join('/', 'inputs')
        self.out_path = os.path.join('/', 'outputs')

        input_root = os.path.join(self.in_path, 'repo', 'heater')

        self.event_file = 'heater_' + self.source_id + '_events.json'
        self.location_file = 'heater_' + self.source_id + '_locations.json'

        data_path = os.path.join(input_root, self.source_id, 'data',
                                 self.event_file)
        locations_path = os.path.join(input_root, self.source_id, 'location',
                                      self.location_file)

        self.fs.create_file(data_path)

        # Use real location file for parsing
        actual_location_file_path = os.path.join(os.path.dirname(__file__),
                                                 'test-locations.json')
        self.fs.add_real_file(actual_location_file_path,
                              target_path=locations_path)
    def setUp(self):

        log_config.configure('DEBUG')

        self.setUpPyfakefs()

        self.data_metadata_path = os.path.join('prt', '2019', '07', '23', '0001')
        self.out_path = os.path.join('/', 'outputs')
        self.calibration_metadata_path = os.path.join('prt', '0001')

        self.data_filename = 'prt_0001_2018-01-03.ext'

        #  Set input files in mock filesystem.
        in_path = os.path.join('/', 'inputs')
        data_path = os.path.join(in_path, 'data', self.data_metadata_path)
        calibration_path = os.path.join(in_path, 'calibration', self.calibration_metadata_path)
        resistance_input_dir = os.path.join(calibration_path, 'resistance')
        temperature_input_dir = os.path.join(calibration_path, 'temperature')

        #  Calibration files
        self.fs.create_file(os.path.join(resistance_input_dir, 'calibration1.xml'))
        self.fs.create_file(os.path.join(resistance_input_dir, 'calibration2.xml'))
        self.fs.create_file(os.path.join(temperature_input_dir, 'calibration1.xml'))
        self.fs.create_file(os.path.join(temperature_input_dir, 'calibration2.xml'))

        #  Data file
        self.fs.create_file(os.path.join(data_path, self.data_filename))

        self.data_path = os.path.join(in_path, 'data')
        self.calibration_path = os.path.join(in_path, 'calibration')
    def setUp(self):

        log_config.configure('DEBUG')

        self.setUpPyfakefs()

        self.out_path = os.path.join('/', 'repo', 'outputs')
        self.data_path = os.path.join('/', 'repo', 'data')
        self.location_path = os.path.join('/', 'location')
        self.metadata_path = os.path.join('prt', '2019', '05', '17')

        #  Create data file.
        self.data_file = 'prt_00001_2019-05-17.ext'
        self.input_data_path = os.path.join(self.data_path, self.metadata_path,
                                            self.data_file)
        self.fs.create_file(self.input_data_path)

        #  Create location file.
        self.location_file = 'prt_00001_locations.json'
        self.input_location_path = os.path.join(self.location_path, 'prt',
                                                '00001', self.location_file)
        self.fs.create_file(self.input_location_path)

        #  Create output directory.
        self.fs.create_dir(self.out_path)
    def setUp(self):
        log_config.configure('DEBUG')
        self.setUpPyfakefs()

        self.input_path = os.path.join('/', 'repo', 'inputs')
        self.output_path = os.path.join('/', 'outputs')

        self.group = 'aspirated-single-121'

        self.location = 'CFGLOC123'

        self.metadata_path = os.path.join('2019', '05', '24', self.group)

        self.data_dir = 'data'
        self.location_dir = 'location'

        self.data_file = 'data.ext'
        self.location_file = 'locations.json'

        self.base_path = os.path.join(self.input_path, 'prt',
                                      self.metadata_path)

        self.in_data_path = os.path.join(self.base_path, self.location,
                                         self.data_dir, self.data_file)
        self.in_location_path = os.path.join(self.base_path, self.location,
                                             self.location_dir,
                                             self.location_file)

        self.fs.create_file(self.in_data_path)
        self.fs.create_file(self.in_location_path)
def main():
    env = environs.Env()
    source_path = env('SOURCE_PATH')
    out_path = env('OUT_PATH')
    log_level = env('LOG_LEVEL')
    log_config.configure(log_level)
    log.debug(f'source_path: {source_path} out_path: {out_path}')
    process(source_path, out_path)
def main():
    env = environs.Env()
    out_path = env('OUT_PATH')
    db_url = env('DATABASE_URL')
    log_level = env('LOG_LEVEL')
    log_config.configure(log_level)
    log.debug(f'Out path: {out_path}')
    load(db_url, out_path)
def main():
    env = environs.Env()
    pathname = env('PATHNAME')
    out_path = env('OUT_PATH')
    log_level = env('LOG_LEVEL')
    log_config.configure(log_level)
    log.debug(f'pathname: {pathname}, log_level: {log_level}')
    join(pathname, out_path)
def main():
    """Group related paths without modifying the paths."""
    env = environs.Env()
    related_paths = env('RELATED_PATHS')
    out_path = env('OUT_PATH')
    log_level = env('LOG_LEVEL')
    log_config.configure(log_level)
    log.debug(f'related_paths: {related_paths} out_path: {out_path}')
    group(related_paths, out_path)
示例#17
0
def main():
    """Group data by related location group."""
    env = environs.Env()
    data_path = env('DATA_PATH')
    out_path = env('OUT_PATH')
    log_level = env('LOG_LEVEL')
    log_config.configure(log_level)
    log.debug(f'data_path: {data_path} out_path: {out_path}')
    group_related(data_path, out_path)
def main():
    """Group input data files without modifying the file paths."""
    env = environs.Env()
    data_path = env('DATA_PATH')
    out_path = env('OUT_PATH')
    log_level = env('LOG_LEVEL')
    log_config.configure(log_level)
    log.debug(f'data_path: {data_path} out_path: {out_path}')
    group(data_path, out_path)
def main():
    env = environs.Env()
    in_path = env.str('IN_PATH')
    out_path = env.str('OUT_PATH')
    log_level = env.str('LOG_LEVEL', 'INFO')
    # 30 percent duplication threshold for dedup by default
    dedup_threshold = env.float('DEDUP_THRESHOLD', 0.3)
    log_config.configure(log_level)
    linkmerge(in_path, out_path, dedup_threshold)
def main():
    env = environs.Env()
    data_path = env('DATA_PATH')
    location_path = env('LOCATION_PATH')
    out_path = env('OUT_PATH')
    log_level = env('LOG_LEVEL')
    log_config.configure(log_level)
    log.debug(f'data_dir: {data_path} location_dir: {location_path} out_dir: {out_path}')
    group(data_path, location_path, out_path)
def main():
    """Add the related location group name stored in the location file to the output path."""
    env = environs.Env()
    source_path = env('SOURCE_PATH')
    group = env('GROUP')
    out_path = env('OUT_PATH')
    log_level = env('LOG_LEVEL')
    log_config.configure(log_level)
    log.debug(f'source_path: {source_path} group: {group} out_path: {out_path}')
    process(source_path, group, out_path)
示例#22
0
def main():
    env = environs.Env()
    calibrated_path = env('CALIBRATED_PATH')
    location_path = env('LOCATION_PATH')
    out_path = env('OUT_PATH')
    log_level = env('LOG_LEVEL')
    log_config.configure(log_level)
    log.debug(f'calibrated_dir: {calibrated_path} '
              f'location_dir: {location_path} out_dir: {out_path}')
    group(calibrated_path, location_path, out_path)
def main():
    env = environs.Env()
    data_path = env('DATA_PATH')
    event_path = env('EVENT_PATH')
    out_path = env('OUT_PATH')
    log_level = env('LOG_LEVEL')
    log_config.configure(log_level)
    log.debug(
        f'data_dir: {data_path} event_dir: {event_path} out_dir: {out_path}')
    target_root_path = group_data(data_path, out_path)
    group_events(event_path, target_root_path)
def main():
    env = environs.Env()
    in_path = env('IN_PATH')
    filter_dirs = env('FILTER_DIR')
    out_path = env('OUT_PATH')
    log_level = env('LOG_LEVEL')
    log_config.configure(log_level)
    log = structlog.get_logger()
    log.debug(
        f'in_path: {in_path} filter_dirs: {filter_dirs} out_dir: {out_path}')
    filter_directory(in_path, filter_dirs, out_path)
示例#25
0
def main():
    """Analyze padded time series data"""
    env = environs.Env()
    data_path = env('DATA_PATH')
    out_path = env('OUT_PATH')
    log_level = env('LOG_LEVEL')
    log_config.configure(log_level)
    log = get_logger()
    log.debug(f'data_path: {data_path}')
    log.debug(f'out_path: {out_path}')
    analyzer.analyze(data_path, out_path)
def main():
    env = environs.Env()
    data_path = env('DATA_PATH')
    calibration_path = env('CALIBRATION_PATH')
    out_path = env('OUT_PATH')
    log_level = env('LOG_LEVEL')
    log_config.configure(log_level)
    log = structlog.get_logger()
    log.debug(f'data_path: {data_path}')
    log.debug(f'calibration_path: {calibration_path}')
    log.debug(f'out_path: {out_path}')
    grouper.group(data_path, calibration_path, out_path)
def main():
    """Group quality and calibration flags."""
    env = environs.Env()
    regularized_path = env('REGULARIZED_PATH')
    quality_path = env('QUALITY_PATH')
    out_path = env('OUT_PATH')
    log_level = env('LOG_LEVEL')
    log_config.configure(log_level)

    log.debug(
        f'regularized_path: {regularized_path} quality_path: {quality_path} out_path: {out_path}'
    )
    group(regularized_path, quality_path, out_path)
示例#28
0
def main():
    env = environs.Env()
    db_url = env('DATABASE_URL')
    out_path = env('OUT_PATH')
    log_level_name = env('LOG_LEVEL')

    log_config.configure(log_level_name)
    log = get_logger()

    log.debug(f'URL: {db_url}')
    log.debug(f'Out path: {out_path}')
    log.debug(f'Log level: {log_level_name}')

    with closing(cx_Oracle.connect(db_url)) as connection:
        thresholds = threshold_finder.find_thresholds(connection)
        date_generated = date_formatter.convert(datetime.utcnow())
        write_file(thresholds, out_path, date_generated)
    def setUp(self):
        log_config.configure('DEBUG')

        self.setUpPyfakefs()

        self.out_path = os.path.join('/', 'repo', 'outputs')
        self.in_path = os.path.join('/', 'repo', 'inputs')

        self.metadata_path = os.path.join('prt', '2019', '01', '05', '767')

        self.data_filename = 'prt_767_2019-01-05.avro'
        data_path = os.path.join(self.in_path, self.metadata_path,
                                 self.data_filename)

        # use real data file to convert
        self.real_path = os.path.join(os.path.dirname(__file__),
                                      self.data_filename)
        self.fs.add_real_file(self.real_path, target_path=data_path)
 def setUp(self):
     # logging
     log_config.configure('DEBUG')
     # location
     self.location_name = 'SENSOR000000'
     # initialize fake file system
     self.setUpPyfakefs()
     #  create output directory
     self.out_path = os.path.join('/', 'outputs', 'repo')
     self.fs.create_dir(self.out_path)
     #  create data repo
     self.create_data_repo()
     #  create location by date repo
     self.create_location_repo()
     # create empty files repo
     self.create_empty_files_repo()
     # directory names to output
     self.output_directories = 'data,location,calibration,uncertainty_data,uncertainty_coef,flags'