"""This program fetches the most recent publicly-available data
for the Indian River stream gauge. It plots the most recent 72 hours worth of 
data. It shows this data in the context of key historical slide events.

The goal of this visualization is to help put the current conditions into 
the context of what kinds of conditions have led to slides in the past.
"""

import utils.analysis_utils as a_utils
from utils import plot_utils

print("Analyzing current river data.")

current_data = a_utils.fetch_current_data(fresh=False)
readings = a_utils.process_xml_data(current_data)

recent_readings = a_utils.get_recent_readings(readings, 48)
critical_points = a_utils.get_critical_points(recent_readings)
plot_utils.plot_current_data_html(recent_readings)

# Make a data file of all hx IRReading objects, to make the rest of the
#   work easier. Probably needs to be two sets, with consistent
#   reading intervals.
# Make a data file of 1 week of readings before moment of Kramer Ave slide.
# Pull from this file to get kramer_readings.
# Get recent_kramer_readings.
# Plot recent_kramer_readings with lower alpha values.

# Repeat for Medvejie 9/2019 slide.
# Put this on a heroku app?
示例#2
0
def analyze_all_data(rise_critical,
                     m_critical,
                     verbose=False,
                     all_results=[],
                     alpha_name=''):
    # DEV: This is an abuse of Python norms. All caps should be constants. :(
    a_utils.RISE_CRITICAL = rise_critical
    a_utils.M_CRITICAL = m_critical

    # Make sure to call the correct parsing function for the data file format.
    # Data analysis is data cleaning. :/
    # DEV: Should probably walk the ir_data_clean directory, instead of making
    #      this list manually.
    # DEV: Should generate a JSON file of IRReading objects, and not have to
    #      parse this data set.
    data_files = [
        'ir_data_clean/irva_utc_072014-022016_hx_format.txt',
        'ir_data_clean/irva_akdt_022016-102019_arch_format.txt',
    ]

    # Get known slides.
    slides_file = 'known_slides/known_slides.json'
    known_slides = SlideEvent.load_slides(slides_file)

    # Track overall stats.
    #   How many notifications followed by slides?
    #   How many notifications not followed by slides?
    #   How many slides were not missed?
    notifications_issued = 0
    associated_notifications = 0
    unassociated_notifications = 0
    unassociated_notification_points = []
    relevant_slides = []
    unassociated_slides = []
    notification_times = {}
    earliest_reading, latest_reading = None, None

    for data_file in data_files:
        # Use proper parsing function.
        if 'hx_format' in data_file:
            all_readings = ph.get_readings_hx_format(data_file)
        elif 'arch_format' in data_file:
            all_readings = ph.get_readings_arch_format(data_file)

        # Keep track of earliest and latest reading across all data files.
        if not earliest_reading:
            earliest_reading = all_readings[0]
            latest_reading = all_readings[-1]
        else:
            if all_readings[0].dt_reading < earliest_reading.dt_reading:
                earliest_reading = all_readings[0]
            if all_readings[-1].dt_reading > latest_reading.dt_reading:
                latest_reading = all_readings[-1]

        # Get all the known slides that occurred during these readings.
        slides_in_range = a_utils.get_slides_in_range(known_slides,
                                                      all_readings)

        # Find the start of all critical periods in this data file.
        first_critical_points = a_utils.get_first_critical_points(all_readings)
        for reading in first_critical_points:
            print(reading.get_formatted_reading())
        notifications_issued += len(first_critical_points)

        # reading_sets is a list of lists. Each list is a set of readings to
        #   plot or analyze, based around a first critical point.
        reading_sets = [
            a_utils.get_48hr_readings(fcp, all_readings)
            for fcp in first_critical_points
        ]

        for reading_set in reading_sets:
            critical_points = a_utils.get_critical_points(reading_set)
            relevant_slide = ph.get_relevant_slide(reading_set, known_slides)
            if relevant_slide:
                relevant_slides.append(relevant_slide)
                associated_notifications += 1
                notification_time = ph.get_notification_time(
                    critical_points, relevant_slide)
                notification_times[relevant_slide] = notification_time
                # Remove this slide from slides_in_range, so we'll
                #   be left with unassociated slides.
                slides_in_range.remove(relevant_slide)
            else:
                # This may be an unassociated notification.
                unassociated_notification_points.append(critical_points[0])
                unassociated_notifications += 1

        # Any slides left in slides_in_range are unassociated.
        #   We can grab a 48-hr data set around this slide.
        for slide in slides_in_range:
            # Get first reading after this slide, and base 48 hrs around that.
            for reading in all_readings:
                if reading.dt_reading > slide.dt_slide:
                    slide_readings = a_utils.get_48hr_readings(
                        reading, all_readings)
                    break
            unassociated_slides.append(slide)

    # Summarize results.
    assert (
        unassociated_notifications == len(unassociated_notification_points))
    unassociated_slides = set(known_slides) - set(relevant_slides)
    slides_outside_range = []
    for slide in known_slides:
        if ((slide.dt_slide < earliest_reading.dt_reading)
                or (slide.dt_slide > latest_reading.dt_reading)):
            unassociated_slides.remove(slide)
            slides_outside_range.append(slide)
    start_str = earliest_reading.dt_reading.strftime('%m/%d/%Y')
    end_str = latest_reading.dt_reading.strftime('%m/%d/%Y')
    if verbose:
        print("\n\n --- Final Results ---\n")
        print(f"Data analyzed from: {start_str} to {end_str}")
        print(f"  Critical rise used: {a_utils.RISE_CRITICAL} feet")
        print(f"  Critical rise rate used: {a_utils.M_CRITICAL} ft/hr")

        print(f"\nNotifications Issued: {notifications_issued}")
        print(f"\nTrue Positives: {associated_notifications}")
        for slide in relevant_slides:
            print(
                f"  {slide.name} - Notification time: {notification_times[slide]} minutes"
            )
        print(f"\nFalse Positives: {unassociated_notifications}")
        for notification_point in unassociated_notification_points:
            print(
                f"  {notification_point.dt_reading.strftime('%m/%d/%Y %H:%M:%S')}"
            )

        print(f"\nFalse Negatives: {len(unassociated_slides)}")
        for slide in unassociated_slides:
            print(f"  {slide.name}")
        print(f"\nSlides outside range: {len(slides_outside_range)}")
        for slide in slides_outside_range:
            print(f"  {slide.name}")

    # Build results dict here, and add to file.
    results_dict = {
        'alpha name': alpha_name,
        'name': f"{a_utils.RISE_CRITICAL}_{a_utils.M_CRITICAL}",
        'critical rise': a_utils.RISE_CRITICAL,
        'critical slope': a_utils.M_CRITICAL,
        'true positives': associated_notifications,
        'false positives': unassociated_notifications,
        'false negatives': len(unassociated_slides),
        'notification times': list(notification_times.values()),
    }

    all_results.append(results_dict)

    # Write all_results to file for further analysis.
    filename = 'other_output/all_results.json'
    with open(filename, 'w') as f:
        json.dump(all_results, f, indent=4)
示例#3
0
def process_hx_data(root_output_directory=''):
    """Process all historical data in ir_data_clean/.

    - Get known slide events.
    - Get readings from file.
    - Pull interesting reading sets from readings. Analysis is done here.
    - Pickle reading sets.
    - Plot reading sets.
    - Summarize results.

    Does not return anything, but generates:
    - pkl files of reading sets.
    - html files containing interactive plots.
    - png files containing static plots.
    - console output summarizing what was found.
    """

    # Get known slides.
    slides_file = 'known_slides/known_slides.json'
    known_slides = SlideEvent.load_slides(slides_file)

    reading_sets = []

    if not args.use_cached_data:
        print("Parsing raw data files...")
        data_files = [
            'ir_data_clean/irva_utc_072014-022016_hx_format.txt',
            'ir_data_clean/irva_akdt_022016-102019_arch_format.txt',
        ]

        for data_file in data_files:
            readings = a_utils.get_readings_from_data_file(data_file)
            reading_sets += a_utils.get_reading_sets(readings, known_slides,
                                                     stats)

        print("Pickling reading sets...")
        for reading_set in reading_sets:
            # Pickle reading sets for faster analysis and plotting later,
            #   and for use by other programs.
            a_utils.pickle_reading_set(reading_set, root_output_directory)

    elif args.use_cached_data:
        print("Reading data from pickled files...")
        pkl_file_path = 'other_output/'
        pkl_files = [
            f for f in listdir(pkl_file_path)
            if path.isfile(path.join(pkl_file_path, f))
            and Path(f).suffix == '.pkl'
        ]

        for pkl_file in pkl_files:
            filename = f"{pkl_file_path}{pkl_file}"
            with open(filename, 'rb') as f:
                reading_set = pickle.load(f)
                reading_sets.append(reading_set)

    if not args.no_interactive_plots:
        for reading_set in reading_sets:
            print("Generating interactive plots...")
            critical_points = a_utils.get_critical_points(reading_set)
            ph.plot_data(reading_set,
                         known_slides=known_slides,
                         critical_points=critical_points,
                         root_output_directory=root_output_directory)

    if not args.no_static_plots:
        for reading_set in reading_sets:
            print("Generating static plots...")
            critical_points = a_utils.get_critical_points(reading_set)
            ph.plot_data_static(reading_set,
                                known_slides=known_slides,
                                critical_points=critical_points,
                                root_output_directory=root_output_directory)

    if not args.use_cached_data:
        a_utils.summarize_results(known_slides, stats)
示例#4
0
#  May take 48*4*20 sec to run???

# Get rid of any existing animation files.
os.system('rm -rf animation_frames')
os.system('mkdir animation_frames')

# Loop over a set of readings, and send successive sets of readings
#  and numbered filenames to pcfme()
first_index = 0
while first_index < len(readings) - 48*readings_per_hour+1:
    # ffmpeg will use images in alphabetical order, so zero-pad frame numbers.
    alph_frame_str = f"{first_index:04}"
    frame_filename = f"animation_frames/animation_frame_{alph_frame_str}.png"
    end_index = first_index + 48*readings_per_hour
    frame_readings = readings[first_index:end_index]
    critical_points = a_utils.get_critical_points(frame_readings)

    plot_utils_mpl.plot_critical_forecast_mpl_extended(
            frame_readings,
            critical_points,
            filename=frame_filename)

    first_index += 1

    # if first_index > 10:
    #     break
if readings_per_hour == 4:
    framerate = 5
elif readings_per_hour == 1:
    framerate = 2
os.system(f"cd animation_frames && ffmpeg -framerate {framerate} -pattern_type glob -i '*.png'   -c:v libx264 -pix_fmt yuv420p animation_file_out.mp4")