def check_labels(els_file_name, labels_file_name): # Check if the labels file exists. if not os.path.exists(labels_file_name): return False # Load labels. with open(labels_file_name, 'r') as labels_file_object: labels = yaml.safe_load(labels_file_object) crossings = labels['change_points'] # Convert to float (unit days). crossing_floats = datestring_to_float(crossings) # Check if atleast one label is valid. atleast_one_valid_label = False for crossing_float in crossing_floats: try: times = get_ELS_data(els_file_name, quantity='anode5', start_time=datetime.min, end_time=datetime.max)[2] if times[0] <= crossing_float <= times[-1]: atleast_one_valid_label = True break except ValueError: pass return atleast_one_valid_label
def main(els_data_file, outputfile, quantity, start_time, end_time, colorbar_range, colorbar_orientation, title, interpolated, show_labels, **kwargs): # Check input arguments - data file should exist. if not os.path.exists(els_data_file): raise OSError('Could not find %s.' % els_data_file) # Create figure and axes. fig, ax = plt.subplots() # Check input arguments - start and end times should be valid. if start_time is not None: try: start_time = datetime.strptime(start_time, '%d-%m-%Y/%H:%M') except ValueError: raise else: start_time = datetime.min if end_time is not None: try: end_time = datetime.strptime(end_time, '%d-%m-%Y/%H:%M').replace(second=59, microsecond=999999) except ValueError: raise else: end_time = datetime.max # Pass all parameters and plot. if interpolated: plot_interpolated_ELS_data(fig, ax, els_data_file, quantity, start_time, end_time, colorbar_range, verbose=True, **kwargs) else: plot_raw_ELS_data(fig, ax, els_data_file, quantity, start_time, end_time, colorbar_range, colorbar_orientation, verbose=True, **kwargs) # Add title. if title is not None: ax.set_title(title) # Plot the events occurring in this file. if show_labels: from compute_labelled_events import list_of_events from data_utils import datestring_to_float labels = list_of_events(os.path.basename(os.path.splitext(els_data_file)[0]), './') # How large is the width of the rectangle around each labelled event? days_per_minute = 1/(24 * 60) window_size = 1*days_per_minute # Annotate plot with labelled events. print 'Labelled events:' for label_type, crossing_timestring in labels: print '- Event at %s of type %s.' % (crossing_timestring, label_type) crossing_time = datestring_to_float(crossing_timestring) ax.axvspan(crossing_time - window_size/2, crossing_time + window_size/2, facecolor=crossing_color(label_type), alpha=1) # Save to file if given. if outputfile is None: plt.show() else: plt.savefig(outputfile, bbox_inches='tight')
def load_anomalies(labels_file, params): # Unpack parameters. start_time = params.start_time end_time = params.end_time anomaly_type = params.anomaly_type # Check if labels file exists. if not os.path.exists(labels_file): raise OSError('Could not find labels file %s.' % labels_file) # Load anomalies from file. with open(labels_file, 'r') as labels_file_obj: labelled_anomalies = yaml.safe_load(labels_file_obj)[anomaly_type] # Convert to float (unit days). labelled_anomalies = datestring_to_float(labelled_anomalies) # Sort by start-time. labelled_anomalies = np.sort(labelled_anomalies) # Return change-points within the time span. return labelled_anomalies[np.logical_and(labelled_anomalies >= start_time, labelled_anomalies <= end_time)]
# Initialize. num_timesteps = 0 scores_sum = 0 scores_min = 1e9 scores_max = -1e9 # Iterate over each folder, filling up the two lists above. for folder, labels_file in zip(folders, labels_files): # Load the labels for this file. with open(labels_file, 'r') as labels_file_object: crossing_datestrings = yaml.safe_load(labels_file_object)['change_points'] # Convert to float (units as days). crossing_times = datestring_to_float(crossing_datestrings) # Load the scores for this file. try: file_full_path = folder + '/' + algorithm + '.hdf5' with h5py.File(file_full_path, 'r') as filedata: scores = filedata['scores'][()] times = filedata['times'][()] except IOError: raise IOError('File %s cannot be found. Have you run these algorithms on the training set?' % (file_full_path)) # Update scores stats. num_timesteps += len(scores) scores_sum += np.sum(scores) scores_min = min(scores_min, np.min(scores)) scores_max = max(scores_max, np.max(scores))
def plot_all(algorithms, title, suffix, savefile): fig, axs = plt.subplots(nrows=len(algorithms), sharex=True, figsize=(10, 18)) colors = plt.rcParams['axes.prop_cycle']() for algorithm, ax in zip(algorithms, axs): # Load crossings with scores. all_crossings_file = ERROR_ANALYSIS_DIR + algorithm + suffix all_crossings = np.load(all_crossings_file) all_crossings_scores = np.array(all_crossings[:, 0], dtype=float) all_crossings_times = datestring_to_float(all_crossings[:, 1]) # Create scatterplot. ax.scatter(all_crossings_times, all_crossings_scores, label=algorithm, s=10, alpha=0.8, **next(colors)) # Load scores summary. scores_summary_file = ERROR_ANALYSIS_DIR + algorithm + '_scores_summary.npy' scores_min, scores_mean, scores_max = np.load(scores_summary_file) # Horizontal lines indicating min, mean and max of 'actual' scores. ax.axhline(y=scores_min, linestyle='-', c='gray', alpha=0.5) ax.axhline(y=scores_mean, linestyle='--', c='black') ax.axhline(y=scores_max, linestyle='-', c='gray', alpha=0.5) # Set x-axis tick range. start = datestring_to_float('01-01-2004/00:00:00') end = datestring_to_float('01-01-2005/00:00:00') ax.set_xlim(start, end) # Set x-axis formatting of dates. ax.xaxis.set_major_formatter(mdates.DateFormatter('%d-%m-%Y/%H:%M')) ax.xaxis.set_tick_params(labelsize=8) # Tilts dates to the left for easier reading. plt.setp(ax.get_xticklabels(), rotation=30, ha='right') # Set title. fig.suptitle(title % LABELS_SUBDIR, y=0.92, fontweight='bold') # Set x-label. ax.set_xlabel('Datetime') fig.text(x=0.02, y=0.5, s='Scores') # Show legend, common across all subplots. labels_handles = { label: handle for ax in fig.axes for handle, label in zip(*ax.get_legend_handles_labels()) } fig.legend( labels_handles.values(), labels_handles.keys(), loc='center right', title='$\\bf{Algorithm}$', fancybox=True, shadow=True, ) # Fix dimensions. fig.subplots_adjust(left=0.15, bottom=0.15, right=0.7) # Save to file. fig.savefig(ERROR_ANALYSIS_DIR + savefile, dpi=fig.dpi, bbox_inches='tight')
def plot_worst(algorithms, title, suffix, savefile, num_samples=5): for algorithm in algorithms: # Each algorithm gets its own figure. fig, axs = plt.subplots(ncols=num_samples, figsize=(25, 5)) # Load crossings with scores. worst_detections_file = ERROR_ANALYSIS_DIR + algorithm + suffix worst_detections = np.load(worst_detections_file)[:num_samples] # Parse each column. worst_detections_scores = np.array(worst_detections[:, 0], dtype=float) worst_detections_times = worst_detections[:, 1] # Plot each of the worst crossings. for detection_time, detection_score, ax in zip( worst_detections_times, worst_detections_scores, axs): # Base ELS file name (without the .DAT extension) for this detection. els_basename = get_ELS_file_name(detection_time, remove_extension=True) # The time of the detection as a datetime object, and the size of the window used for plotting. detection_time_dt = convert_to_dt(detection_time) time_diff = timedelta(minutes=TIME_TOLERANCE // 2) # Plot ELS data first. els_data_file = DATA_DIR + els_basename + '.DAT' plot_interpolated_ELS_data(fig, ax, els_data_file, start_time=detection_time_dt - time_diff, end_time=detection_time_dt + time_diff, colorbar_orientation='horizontal', quantity='anode5', blur_sigma=BLUR_SIGMA, bin_selection=BIN_SELECTION, filter=FILTER, filter_size=FILTER_SIZE) # Obtain the list of events occurring in this file. labels = list_of_events(els_basename, CROSSINGS_DIR) # How large is the width of the rectangle around each labelled event? days_per_minute = 1 / (24 * 60) window_size = 1 * days_per_minute # Annotate plot with labelled events. for label_type, crossing_timestring in labels: if detection_time_dt - time_diff <= convert_to_dt( crossing_timestring) <= detection_time_dt + time_diff: crossing_time = datestring_to_float(crossing_timestring) ax.axvspan(crossing_time - window_size / 2, crossing_time + window_size / 2, facecolor=crossing_color(label_type), alpha=1) # Set title as the score. ax.set_title('Score %0.2f' % detection_score, pad=55) # Set title. fig.suptitle(title % (LABELS_SUBDIR, algorithm), x=0.45, y=0.92, fontweight='bold') # Fix dimensions. fig.subplots_adjust(left=0.15, bottom=0.15, right=0.7, top=0.65, wspace=0.4) # Save to file. fig.savefig(ERROR_ANALYSIS_DIR + savefile % algorithm, dpi=fig.dpi, bbox_inches='tight')
fig, axs = plt.subplots(nrows=len(algorithm_files) + 1, figsize=(10, 40), sharex=True) # Plot ELS data. plot_interpolated_ELS_data(fig, axs[0], els_data_file, 'anode5', colorbar_orientation='horizontal', blur_sigma=BLUR_SIGMA, bin_selection=BIN_SELECTION, filter=FILTER, filter_size=FILTER_SIZE) # Load labels. with open(els_labels_file, 'r') as labels_file_object: labels = yaml.safe_load(labels_file_object) crossings = labels['change_points'] # Mark each crossing in the first plot with a window of the appropriate color. color = crossing_color(LABELS_SUBDIR) days_per_minute = 1/(24 * 60) window_size = 5*days_per_minute for crossing in crossings: crossing_time = datestring_to_float(crossing) axs[0].axvspan(crossing_time - window_size/2, crossing_time + window_size/2, facecolor=color, alpha=1) # Fill the remaining subplots with scores from each algorithm. plotter = StatsPlotter() for index, algorithm_file in enumerate(algorithm_files, start=1): with h5py.File(els_dir_full_path + algorithm_file, 'r') as filedata: scores = filedata['scores'][()] times = filedata['times'][()] plotter.plot_scores(fig, axs[index], times, scores) axs[index].set_xlabel(algorithm_file) # Create plots directory, if it doesn't exist. if not os.path.exists(RANDOM_PLOTS_DIR): Path(RANDOM_PLOTS_DIR).mkdir(parents=True, exist_ok=True)