Пример #1
0
def group(calibrated_path, location_path, out_path):
    """
    Write calibrated data and location files into the output path.

    :param calibrated_path: The input path for calibrated files.
    :type calibrated_path: str
    :param location_path: The input path for location files.
    :type location_path: str
    :param out_path: The output path for writing grouped files.
    :type out_path: str
    :return:
    """
    i = 0
    for file_path in file_crawler.crawl(calibrated_path):
        parts = file_path.parts
        source_type = parts[3]
        year = parts[4]
        month = parts[5]
        day = parts[6]
        source_id = parts[7]
        data_type = parts[8]
        log.debug(f'year: {year}  month: {month}  day: {day}')
        log.debug(
            f'source type: {source_type} source_id: {source_id} data type: {data_type}'
        )
        target_root = os.path.join(out_path, source_type, year, month, day,
                                   source_id)
        if i == 0:  # Only link location once.
            link_location(location_path, target_root)
        # Grab all directories and files under the common path (after the data type).
        target = os.path.join(target_root, data_type, *parts[9:])
        log.debug(f'target: {target}')
        file_linker.link(file_path, target)
        i += 1
def link(paths, group_names, out_path):
    """
    Loop through the files and link into the output directory including the location
    context group name in the path.

    :param paths: File paths to link.
    :type paths: list
    :param group_names: A List of associated location context group names.
    :type group_names: list
    :param out_path: The output directory for writing.
    :type out_path: str
    :return:
    """
    for path in paths:
        # parse the paths
        file_path = path.get('file_path')
        parts = path.get('path_parts')
        source_type = parts.get("source_type")
        year = parts.get("year")
        month = parts.get("month")
        day = parts.get("day")
        location = parts.get("location")
        data_type = parts.get("data_type")
        remainder = parts.get("remainder")
        # build the output path
        log.debug(f't: {source_type} Y: {year} M: {month} D: {day} '
                  f'loc: {location} type: {data_type} remainder: {remainder}')
        for group_name in group_names:
            target_dir = os.path.join(out_path, source_type, year, month, day, group_name, location, data_type)
            if not os.path.exists(target_dir):
                os.makedirs(target_dir)
            destination = os.path.join(target_dir, *remainder[0:])
            # link the file
            log.debug(f'source: {file_path} destination: {destination}')
            file_linker.link(file_path, destination)
def group(data_path, location_path, out_path):
    """
    Write event data and location files into output path.

    :param data_path: The path to the data files.
    :type data_path: str
    :param location_path: The path to the location file.
    :type location_path: str
    :param out_path: The path for writing results.
    :type out_path: str
    :return:
    """
    for file_path in file_crawler.crawl(data_path):
        trimmed_path = target_path.trim_path(file_path)
        log.debug(f'trimmed_path: {trimmed_path}')
        parts = trimmed_path.parts
        source_type = parts[0]
        year = parts[1]
        month = parts[2]
        day = parts[3]
        source_id = parts[4]
        filename = parts[5]
        log.debug(f'filename: {filename}')
        log.debug(f'source type: {source_type} source_id: {source_id}')
        target_root = os.path.join(out_path, source_type, year, month, day, source_id)
        link_location(location_path, target_root)
        data_target_path = os.path.join(target_root, 'data', filename)
        log.debug(f'data_target_path: {data_target_path}')
        file_linker.link(file_path, data_target_path)
Пример #4
0
def group_related(path, out_path):
    """
    Link related data and location files into the output directory.

    :param path: Directory or file path.
    :type path: str
    :param out_path: The output path for related data.
    :type out_path: str
    """
    for file_path in file_crawler.crawl(path):
        trimmed_path = target_path.trim_path(file_path)
        parts = pathlib.Path(trimmed_path).parts
        source_type = parts[0]
        year = parts[1]
        month = parts[2]
        day = parts[3]
        group = parts[4]
        location = parts[5]
        data_type = parts[6]
        remainder = parts[7:]
        base_output_path = os.path.join(out_path, year, month, day, group)
        target = os.path.join(base_output_path, source_type, location,
                              data_type, *remainder[0:])
        log.debug(f'File target: {target}')
        file_linker.link(file_path, target)
Пример #5
0
def link(paths, group_names, out_path):
    """
    Link the paths into the output directory.

    :param paths: The file paths.
    :type paths: list
    :param group_names: The context group names for the location.
    :type group_names: list
    :param out_path: The output path for writing results.
    :type out_path: str
    :return:
    """
    for path in paths:

        file_path = path.get('file_path')
        parts = path.get('path_parts')

        source_type = parts.get("source_type")
        source_id = parts.get("source_id")
        data_type = parts.get("data_type")
        filename = parts.get("filename")

        # Build the output path
        for group_name in group_names:
            log.debug(f'source_type: {source_type} id: {source_id} data_type: {data_type} file: {filename}')
            target_dir = os.path.join(out_path, source_type, group_name, source_id, data_type)
            if not os.path.exists(target_dir):
                os.makedirs(target_dir)
            destination = os.path.join(target_dir, filename)

            # Link the file
            log.debug(f'source: {file_path} destination: {destination}')
            file_linker.link(file_path, destination)
Пример #6
0
def get_data_files(data_path, out_path, start_date=None, end_date=None):
    """
    Get the data file path keys between start and end dates.

    :param data_path: The path to the data file directory.
    :type data_path: str
    :param out_path: The path to write results.
    :type out_path: str
    :param start_date: The start date.
    :type start_date: datetime object
    :param end_date: The end date.
    :type end_date: datetime object
    :return: list of data files.
    """
    keys = []
    for file_path in file_crawler.crawl(data_path):
        parts = file_path.parts
        source_type = parts[3]
        year = parts[4]
        month = parts[5]
        day = parts[6]
        location_name = parts[7]
        data_type = parts[8]
        filename = parts[9]
        if not check_date(year, month, day, start_date, end_date):
            continue
        target_root = os.path.join(out_path, source_type, year, month, day,
                                   location_name)
        target_path = os.path.join(target_root, data_type, filename)
        file_linker.link(file_path, target_path)
        key = '/' + source_type + '/' + year + '/' + month + '/' + day + '/' + location_name
        keys.append(key)
    return keys
Пример #7
0
def group(data_path, location_path, out_path):
    """
    Write data and location files into the output path.

    :param data_path: The path to the data files.
    :type data_path: str
    :param location_path: The path to the location files.
    :type location_path: str
    :param out_path: The output path to write grouped files.
    :type out_path: str
    :return:
    """
    for file_path in file_crawler.crawl(data_path):
        parts = file_path.parts
        source_type = parts[3]
        year = parts[4]
        month = parts[5]
        day = parts[6]
        filename = parts[7]
        log.debug(f'data filename: {filename}')
        name = DataFilename(filename)
        source_id = name.source_id()
        log.debug(f'source type: {source_type} source_id: {source_id}')
        log.debug(f'year: {year}  month: {month}  day: {day}')
        log.debug(f'filename: {filename}')
        target_root = os.path.join(out_path, source_type, year, month, day,
                                   source_id)
        link_location(location_path, target_root)
        data_target_path = os.path.join(target_root, 'data', filename)
        log.debug(f'data_target_path: {data_target_path}')
        file_linker.link(file_path, data_target_path)
Пример #8
0
def link_path(target_dir, empty_file_path, location_name, year, month, day):
    """
    Link the empty file path into the target path.

    :param target_dir: The target directory for writing files.
    :type target_dir: str
    :param empty_file_path: The source empty file path.
    :type empty_file_path: str
    :param location_name: The location name.
    :type location_name: str
    :param year: The file year.
    :type year: str
    :param month: The file month.
    :type month: str
    :param day: The file day.
    :type day: str
    :return:
    """
    file_name = pathlib.Path(empty_file_path).name
    file_name = file_name.replace('location', location_name)
    file_name = file_name.replace('year', year)
    file_name = file_name.replace('month', month)
    file_name = file_name.replace('day', day)
    target_path = os.path.join(target_dir, file_name)
    print(f'target_path: {target_path}')
    file_linker.link(empty_file_path, target_path)
def group_data(data_path, out_path):
    """
    Write data and event files into output path.

    :param data_path: The path to the data files.
    :type data_path: str
    :param out_path: The output path for writing results.
    :type out_path: str
    :return:
    """
    target_root = None
    for file_path in file_crawler.crawl(data_path):
        trimmed_path = target_path.trim_path(file_path)
        parts = trimmed_path.parts
        year = parts[0]
        month = parts[1]
        day = parts[2]
        group_name = parts[3]
        source_type = parts[4]
        location = parts[5]
        data_type = parts[6]
        filename = parts[7]
        target_root = os.path.join(out_path, year, month, day, group_name)
        data_target_path = os.path.join(target_root, source_type, location,
                                        data_type, filename)
        file_linker.link(file_path, data_target_path)
    return target_root
def group(regularized_dir, quality_dir, out_dir):
    """
    Group matching regularized and quality files in the output directory.

    :param regularized_dir: The path containing regularized files.
    :type regularized_dir: str
    :param quality_dir: The path containing quality files.
    :type quality_dir: str
    :param out_dir: The path for writing results.
    :type out_dir: str
    :return:
    """
    regularized_files = load_files(regularized_dir, out_dir)
    quality_files = load_files(quality_dir, out_dir)
    regularized_keys = set(regularized_files.keys())
    quality_keys = set(quality_files.keys())
    log.debug(f'regularized_keys: {regularized_keys}')
    log.debug(f'quality_keys: {quality_keys}')
    common = regularized_keys.intersection(quality_keys)
    log.debug(f'common: {common}')
    for key in common:
        regularized_paths = regularized_files.get(key)
        quality_paths = quality_files.get(key)
        file_linker.link(regularized_paths.get('source'),
                         regularized_paths.get('destination'))
        file_linker.link(quality_paths.get('source'),
                         quality_paths.get('destination'))
Пример #11
0
 def test_link(self):
     source = '/test/input/file.foo'
     target = '/test/output/file.foo'
     self.fs.create_file(source)
     self.assertTrue(os.path.isfile(source))
     self.assertFalse(os.path.isfile(target))
     file_linker.link(source, target)
     self.assertTrue(os.path.isfile(target))
def group(path, out_path):
    """
    Link files into the output directory.

    :param path: File or directory paths.
    :type path: str
    :param out_path: The output path for writing results.
    :type out_path: str
    """
    for file_path in file_crawler.crawl(path):
        target = target_path.get_path(file_path, out_path)
        log.debug(f'target: {target}')
        file_linker.link(file_path, target)
Пример #13
0
    def pad(self):
        """
        Pad the data for the window size.

        :return:
        """
        try:
            manifests = {}
            manifest_file_names = {}
            for file_path in file_crawler.crawl(self.data_path):
                parts = pathlib.Path(file_path).parts
                year = parts[self.year_index]
                month = parts[self.month_index]
                day = parts[self.day_index]
                location = parts[self.location_index]
                sub_dir = parts[self.sub_dir_index]
                if sub_dir in self.sub_dirs_to_process:
                    location_path = os.path.join(*parts[0:self.location_index + 1])
                    if location not in manifests:
                        manifests[location] = []
                    # get data date
                    date = datetime.date(int(year), int(month), int(day))
                    # get dates in padded range
                    dates_in_padded_range = padder_util.get_dates_in_padded_range(date, self.window_size)
                    # link file into each date in padded range
                    destination_parts = list(parts)
                    for index in range(1, len(self.out_dir_parts)):
                        destination_parts[index] = self.out_dir_parts[index]
                    for date_in_padded_range in dates_in_padded_range:
                        destination_parts[self.year_index] = str(date_in_padded_range.year)
                        destination_parts[self.month_index] = str(date_in_padded_range.month).zfill(2)
                        destination_parts[self.day_index] = str(date_in_padded_range.day).zfill(2)
                        # generate destination path
                        destination_path = os.path.join(*destination_parts)
                        log.debug(f'source: {file_path}')
                        log.debug(f'destination: {destination_path}')
                        file_linker.link(file_path, destination_path)
                        manifests[location].append(date_in_padded_range)
                        if date_in_padded_range == date:
                            # construct manifest filename
                            manifest_path = os.path.dirname(destination_path)  # remove data filename
                            manifest_file_names[location] = os.path.join(manifest_path, 'manifest.txt')
                        output_writer.write_thresholds(location_path, destination_path)
                else:
                    destination_path = os.path.join(self.out_path, *parts[3:len(parts) + 1])
                    file_linker.link(file_path, destination_path)
            output_writer.write_manifests(manifests, manifest_file_names)  # write manifest files
        except Exception:
            exception_type, exception_obj, exception_tb = sys.exc_info()
            log.error("Exception at line " + str(exception_tb.tb_lineno) + ": " + str(sys.exc_info()))
Пример #14
0
def link_location(location_path, target_root):
    """
    Link the location file into the target root.

    :param location_path: The location file path.
    :type location_path: str
    :param target_root: The target directory to write the location file.
    :type target_root: str
    :return:
    """
    for file in file_crawler.crawl(location_path):
        location_filename = pathlib.Path(file).name
        target = os.path.join(target_root, 'location', location_filename)
        file_linker.link(file, target)
def link_location(location_path, target_root):
    """
    Link the location file into the target directory.

    :param location_path: The location file path.
    :type location_path: str
    :param target_root: The target directory path.
    :type target_root: str
    :return:
    """
    for file in file_crawler.crawl(location_path):
        location_filename = pathlib.Path(file).name
        location_target_path = os.path.join(target_root, 'location', location_filename)
        log.debug(f'location_target_path: {location_target_path}')
        file_linker.link(file, location_target_path)
Пример #16
0
    def link_source(file_paths_by_type, out_path):
        """
        Get file paths by data type and link into output directory.

        :param file_paths_by_type: File paths by data type.
        :type file_paths_by_type dict
        :param out_path: The output path.
        :type out_path: str
        """
        for path_by_type in file_paths_by_type:
            for data_type in path_by_type:
                file_path = path_by_type.get(data_type)
                parts = pathlib.Path(file_path).parts
                destination = os.path.join(out_path, *parts[3:])
                log.debug(f'source: {file_path} destination: {destination}')
                file_linker.link(file_path, destination)
def join(pathname, out_path):
    """
    Join paths according to the given pathname and
    link all matching files into the output directory.

    :param pathname: The path pattern to match.
    :type pathname: str
    :param out_path: The output path for writing results.
    :type out_path: str
    """
    files = [fn for fn in glob.glob(pathname, recursive=True)
             if not os.path.basename(fn).startswith(out_path) if os.path.isfile(fn)]
    for file in files:
        log.debug(f'found matching file: {file}')
        target = target_path.get_path(file, out_path)
        log.debug(f'target: {target}')
        file_linker.link(file, target)
def group(paths, out_path):
    """
    Link all files into the output directory.

    :param paths: Comma separated list of environment variable names whose values are full directory paths.
    :type paths: str
    :param out_path: The output path for writing results.
    :type out_path: str
    """
    if ',' in paths:
        paths = paths.split(',')
    log.debug(f'paths: {paths}')
    for p in paths:
        log.debug(f'path: {p}')
        path = os.environ[p]
        for file_path in file_crawler.crawl(path):
            target = target_path.get_path(file_path, out_path)
            log.debug(f'target: {target}')
            file_linker.link(file_path, target)
def filter_directory(in_path, filter_dirs, out_path):
    """
    Link the target directory into the output directory.

    :param in_path: The input path.
    :type in_path: str
    :param filter_dirs: The directories to filter.
    :type filter_dirs: str
    :param out_path: The output path for writing results.
    :type out_path: str
    :return:
    """
    parsed_dirs = parse_dirs(filter_dirs)
    for r, d, f in os.walk(in_path):
        for name in d:
            if not name.startswith('.') and name in parsed_dirs:
                source = os.path.join(r, name)
                destination = target_path.get_path(source, out_path)
                file_linker.link(source, destination)
Пример #20
0
def write_ancillary_data(out_dir, root):
    """
    Write any additional files present in the input directory
    beyond data and thresholds into the output directory.

    :param out_dir: The output directory for writing results.
    :type out_dir: str
    :param root: The threshold root directory.
    :type root: str
    :return:
    """
    parent_dir = pathlib.Path(root).parent
    for file_path in file_crawler.crawl(parent_dir):
        file_path = str(file_path)
        if 'data' not in file_path and 'threshold' not in file_path:
            parts = pathlib.Path(file_path).parts
            trimmed_path = os.path.join(*parts[3:])
            output_path = os.path.join(out_dir, trimmed_path)
            file_linker.link(file_path, output_path)
def write_thresholds(source_path, destination_path):
    """
    Write the threshold file.

    :param source_path: The threshold file path.
    :type source_path: str
    :param destination_path: The path to write the file.
    :type destination_path: str
    :return:
    """
    threshold_dir = 'threshold'
    threshold_filename = 'thresholds.json'
    threshold_file = os.path.join(source_path, threshold_dir,
                                  threshold_filename)
    if pathlib.Path(threshold_file).exists():
        path = pathlib.Path(destination_path).parent.parent
        threshold_out = os.path.join(path, threshold_dir, threshold_filename)
        log.debug(f'Threshold file: {threshold_file}')
        log.debug(f'Threshold out: {threshold_out}')
        file_linker.link(threshold_file, threshold_out)
Пример #22
0
def write_thresholds(source_path, destination_path):
    """
    Write thresholds if they exist in the source repository.

    :param source_path: The source path for the threshold file.
    :type source_path: str
    :param destination_path: The destination path to write results.
    :type destination_path: str
    :return:
    """
    threshold_dir = 'threshold'
    threshold_filename = 'thresholds.json'
    source_dir = pathlib.Path(source_path).parent.parent
    destination_dir = pathlib.Path(destination_path).parent.parent
    source = os.path.join(source_dir, threshold_dir, threshold_filename)
    if os.path.exists(source):
        destination = os.path.join(destination_dir, threshold_dir,
                                   threshold_filename)
        log.debug(f'linking {source} to {destination}')
        file_linker.link(source, destination)
    def upload(self):
        """
        Link the source files into the output directory.

        :return:
        """
        try:
            for root, dirs, files in os.walk(self.dataPath):
                for filename in files:
                    if not filename.startswith('.'):

                        sourcePath = os.path.join(root, filename)
                        parts = pathlib.Path(sourcePath).parts

                        # date
                        filenameParts = filename.split(self.filenameDelimiter)
                        dateTime = filenameParts[self.dateIndex]
                        # loc
                        loc = filenameParts[self.locIndex]

                        # construct target filename
                        targetParts = [
                            self.outPath, self.outputName, dateTime, loc,
                            filenameParts[len(filenameParts) - 2],
                            filenameParts[len(filenameParts) - 1]
                        ]
                        targetFilename = self.filenameDelimiter.join(
                            targetParts[1:])
                        targetPath = os.path.join(
                            *targetParts[:len(targetParts) - 2],
                            targetFilename)

                        # symlink to target
                        print("sourcepath = " + sourcePath)
                        print("targetpath = " + targetPath)
                        file_linker.link(sourcePath, targetPath)

        except Exception:
            exc_type, exc_obj, exc_tb = sys.exc_info()
            log.error("Exception at line " + str(exc_tb.tb_lineno) + ": " +
                      str(sys.exc_info()))
def group_events(event_path, target_root):
    """
    Group the event files into the target directory.

    :param event_path: The path to the event files.
    :type event_path: str
    :param target_root: The root output path.
    :type target_root: str
    :return:
    """
    reference_group = pathlib.Path(target_root).name
    for file_path in file_crawler.crawl(event_path):
        trimmed_path = target_path.trim_path(file_path)
        parts = pathlib.Path(trimmed_path).parts
        source_type = parts[0]
        group_name = parts[1]
        source_id = parts[2]
        data_type = parts[3]
        filename = parts[4]
        event_target = os.path.join(target_root, source_type, source_id,
                                    data_type, filename)
        log.debug(f'event_target: {event_target}')
        if group_name == reference_group:
            file_linker.link(file_path, event_target)
def process(data_path, out_path):
    """
    Load events from the asset data path.

    :param data_path: The data path.
    :type data_path: str
    :param out_path: The output path for writing results.
    :type out_path: str
    :return:
    """
    for file_path in file_crawler.crawl(data_path):
        trimmed_path = target_path.trim_path(file_path)
        parts = trimmed_path.parts
        source_type = parts[0]
        source_id = parts[1]
        filename = parts[2]
        log.debug(f'source filename: {filename}')
        log.debug(f'source type: {source_type} source_id: {source_id}')
        output_filename = source_type + '_' + source_id + '_events.json'
        output_path = os.path.join(out_path, source_type, source_id,
                                   output_filename)
        log.debug(f'output_path: {output_path}')
        if not os.path.exists(output_path):
            file_linker.link(file_path, output_path)
    def pad(self):
        """
        Pad the data to the calculated window size.

        :return:
        """
        try:
            max_window_size_by_date_and_location = {}
            min_data_rate_by_date_and_location = {}
            manifests = {}
            manifest_file_names = {}
            for root, dirs, files in os.walk(self.data_path):
                for filename in files:
                    if not filename.startswith('.'):
                        file_path = os.path.join(root, filename)
                        parts = pathlib.Path(file_path).parts
                        subdir = parts[self.sub_dir_index]
                        if subdir in self.sub_dirs_to_output:
                            year = parts[self.year_index]
                            month = parts[self.month_index]
                            day = parts[self.day_index]
                            config_location = parts[self.config_location_index]
                            date_location_key = year + month + day + config_location

                            config_location_path = os.path.join(
                                *parts[0:self.config_location_index + 1])

                            if config_location not in manifests:
                                manifests[config_location] = []

                            # get min of all data rates (to ensure adequate window coverage)
                            if date_location_key not in min_data_rate_by_date_and_location:
                                location_path = os.path.join(
                                    config_location_path, 'location')
                                location_files = [
                                    f for f in os.listdir(location_path)
                                    if f.endswith('.json')
                                ]
                                location_file = os.path.join(
                                    location_path, location_files[0])
                                min_data_rate_by_date_and_location[date_location_key] = \
                                    padder_util.get_min_data_rate(location_file)
                            data_rate = min_data_rate_by_date_and_location[
                                date_location_key]

                            # get max of all window sizes
                            if date_location_key not in max_window_size_by_date_and_location:
                                threshold_path = os.path.join(
                                    config_location_path, 'threshold')
                                threshold_files = [
                                    f for f in os.listdir(threshold_path)
                                    if f.endswith('.json')
                                ]
                                threshold_file = os.path.join(
                                    threshold_path, threshold_files[0])
                                log.debug(f'thresholdFile: {threshold_file}')
                                max_window_size_by_date_and_location[date_location_key] = \
                                    padder_util.get_max_window_size(threshold_file, data_rate)
                            window_size = max_window_size_by_date_and_location[
                                date_location_key]

                            # get data date
                            date = datetime.date(int(year), int(month),
                                                 int(day))

                            # calculate pad size
                            pad_size = padder_util.calculate_pad_size(
                                window_size)

                            # get dates in padded range
                            dates_in_padded_range = padder_util.get_dates_in_padded_range(
                                date, pad_size)

                            # link file into each date in padded range
                            destination_parts = list(parts)
                            for idx in range(1, len(self.out_dir_parts)):
                                destination_parts[idx] = self.out_dir_parts[
                                    idx]
                            for dateInPaddedRange in dates_in_padded_range:
                                destination_parts[self.year_index] = str(
                                    dateInPaddedRange.year)
                                destination_parts[self.month_index] = str(
                                    dateInPaddedRange.month).zfill(2)
                                destination_parts[self.day_index] = str(
                                    dateInPaddedRange.day).zfill(2)
                                # generate destination path
                                destination_path = os.path.join(
                                    *destination_parts)
                                log.debug(f'source: {file_path}')
                                log.debug(f'destination: {destination_path}')
                                file_linker.link(file_path, destination_path)
                                manifests[config_location].append(
                                    dateInPaddedRange)
                                if dateInPaddedRange == date:
                                    # construct manifest filename
                                    manifest_path = os.path.dirname(
                                        destination_path
                                    )  # remove data file name
                                    manifest_file_names[
                                        config_location] = os.path.join(
                                            manifest_path, 'manifest.txt')
                                output_writer.write_thresholds(
                                    config_location_path, destination_path)
            output_writer.write_manifests(
                manifests, manifest_file_names)  # write manifest files

        except Exception:
            exc_type, exc_obj, exc_tb = sys.exc_info()
            log.error("Exception at line " + str(exc_tb.tb_lineno) + ": " +
                      str(sys.exc_info()))
Пример #27
0
def process_location_files(location_path,
                           keys,
                           out_path,
                           output_directories,
                           empty_data_path,
                           empty_flags_path,
                           empty_uncertainty_data_path,
                           start_date=None,
                           end_date=None):
    """
    Process the location files.

    :param location_path: The path to the location file.
    :type location_path: str
    :param keys: The path keys to the data files.
    :type keys: list
    :param out_path: The path to write results.
    :type out_path: str
    :param output_directories: The output directories to write.
    :type output_directories: list
    :param empty_data_path: Path to the empty data files.
    :type empty_data_path: str
    :param empty_flags_path: Path to the empty flag files.
    :type empty_flags_path: str
    :param empty_uncertainty_data_path: Path to the empty uncertainty data file.
    :type empty_uncertainty_data_path: str
    :param start_date: The start date.
    :type start_date datetime object
    :param end_date: The end date.
    :type end_date: datetime object
    :return:
    """
    for file_path in file_crawler.crawl(location_path):
        parts = file_path.parts
        source_type = parts[3]
        year = parts[4]
        month = parts[5]
        day = parts[6]
        named_location_name = parts[7]
        filename = parts[8]
        if not check_date(year, month, day, start_date, end_date):
            continue
        target_root = os.path.join(out_path, source_type, year, month, day,
                                   named_location_name)
        # link the location file into the output directory
        location_target = os.path.join(target_root, 'location', filename)
        file_linker.link(file_path, location_target)
        # create an empty calibration file in the target directory but do not overwrite
        calibration_target = os.path.join(target_root, 'calibration')
        os.makedirs(calibration_target, exist_ok=True)
        # create key to find corresponding data for the sensor and date
        key = '/' + source_type + '/' + year + '/' + month + '/' + day + '/' + named_location_name
        if key not in keys:
            # key not found, create empty directories and files
            print(f'Key not found {key}')
            for directory in output_directories:
                target_dir = os.path.join(target_root, directory)
                if directory == 'data':
                    link_path(target_dir, empty_data_path, named_location_name,
                              year, month, day)
                elif directory == 'flags':
                    link_path(target_dir, empty_flags_path,
                              named_location_name, year, month, day)
                elif directory == 'uncertainty_data':
                    link_path(target_dir, empty_uncertainty_data_path,
                              named_location_name, year, month, day)
                elif directory == 'uncertainty_coef':
                    os.makedirs(target_dir, exist_ok=True)
Пример #28
0
def analyze(data_dir, out_dir):
    """
    Analyze time series data to calculate additional time padding required for processing with thresholds.

    :param data_dir: The data directory.
    :type data_dir: str
    :param out_dir: The output directory.
    :type out_dir: str
    :return:
    """
    out_dir_parts = list(pathlib.Path(out_dir).parts)
    manifest_file = 'manifest.txt'
    try:
        for root, dirs, files in os.walk(data_dir):
            for filename in files:
                if filename == manifest_file:
                    # read manifest
                    dates = [
                        date.rstrip()
                        for date in open(os.path.join(root, filename))
                    ]
                    # check for existence of complete manifest
                    dates_not_found = []
                    for date in dates:
                        dates_not_found.append(date)
                    for date in dates:
                        for data_file in os.listdir(root):
                            log.debug(f'data_file: {data_file}')
                            if data_file != manifest_file:
                                data_file_date = MergedDataFilename(
                                    data_file).date()
                                log.debug(
                                    f'checking data file date: {data_file_date} and '
                                    f'manifest date {date} in {dates_not_found}'
                                )
                                if date in data_file_date and date in dates_not_found:
                                    log.debug(f'found data for: {date}')
                                    dates_not_found.remove(date)
                    # if complete, symlink to output repository
                    if not dates_not_found:
                        for data_file in os.listdir(root):
                            # TODO: The root is 'data', need to go one directory up.
                            if data_file != manifest_file:
                                source_path = os.path.join(root, data_file)
                                destination_parts = pathlib.Path(
                                    source_path).parts
                                destination_parts = list(destination_parts)
                                for index in range(1, len(out_dir_parts)):
                                    destination_parts[index] = out_dir_parts[
                                        index]
                                destination_path = os.path.join(
                                    *destination_parts)
                                log.debug(
                                    f'linking {source_path} to {destination_path}'
                                )
                                file_linker.link(source_path, destination_path)
                                write_thresholds(source_path, destination_path)
                        # Go up one directory and get any ancillary files to write.
                        write_ancillary_data(out_dir, root)

    except Exception:
        exception_type, exception_obj, exception_tb = sys.exc_info()
        log.error("Exception at line " + str(exception_tb.tb_lineno) + ": " +
                  str(sys.exc_info()))