Python JpmLogger примеры использования

Язык программирования: Python

Пространство имен/Пакет: jpm_logger

Класс/Тип: JpmLogger

Примеров на hotexamples.com: 9

Python JpmLogger - 9 примеров найдено. Это лучшие примеры Python кода для jpm_logger.JpmLogger, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

JpmLogger(10)

info(10)

warning(7)

error(2)

Основные методы

JpmLogger (10)

info (10)

warning (7)

error (2)

Пример #1

Показать файл

def init():
    """Initialize the jedi catalog: load the data

        Inputs:
            None.

        Optional Inputs:
            None

        Outputs:
            All outputs are globals accessible by doing import jedi_config
            logger [JpmLogger]:                               A configurable log that can optionally also print to console.
            all_minutes_since_last_flare [numpy float array]: The amount of time between each flare.
            preflare_indices [numpy int array]:               The indices where flares are considered time-independent.

        Optional Outputs:
             None

        Example:
            jedi_config.init()
    """
    global logger, all_minutes_since_last_flare, preflare_indices

    # Initialize logger
    logger = JpmLogger(filename=logger_filename,
                       path=output_path,
                       console=False)
    logger.info('Logger initialized.')

    # Set up folders
    init_folders()

    # Set up filenames
    init_filenames()

    # Load the EVE data
    load_eve_data()

    # Get GOES flare events above C1 within date range corresponding to EVE data
    load_goes_flare_event_data()

    # Compute the amount of time between all flares [minutes]
    peak_time = goes_flare_events['peak_time']
    all_minutes_since_last_flare = (peak_time[1:] - peak_time[0:-1]).sec / 60.0

    # Figure out which flares are independent, store those indices
    is_flare_independent = all_minutes_since_last_flare > threshold_time_prior_flare_minutes
    preflare_indices = np.where(
        is_flare_independent
    )[0] + 1  # Add 1 to map back to event index and not to the differentiated vector
    logger.info(
        'Found {0} independent flares of {1} total flares given a time separation of {2} minutes.'
        .format(len(preflare_indices), len(is_flare_independent),
                threshold_time_prior_flare_minutes))

Пример #2

Показать файл

def get_goes_flare_events(start_time,
                          end_time,
                          minimum_flare_size='C1',
                          verbose=False):
    """Get a list of flare events from NOAA's GOES/XRS. Just a wrapper around sunpy.instr.goes get_goes_event_list.

    Inputs:
        start_time [metatime or string]: The beginning of the time window of interest. See jpm_time_conversions.py
                                         (https://github.com/jmason86/python_convenience_functions/blob/master/jpm_time_conversions.py)
                                         for allowed metatime formats if not using an iso or human like time string.
        end_time [metatime]:             Same as start time but for the end of the time window.

    Optional Inputs:
        minimum_flare_size [string]: The minimum flare size to search for. Default is 'C1'.
        verbose [bool]:              Set to log the processing messages to disk and console. Default is False.

    Outputs:
        goes_events [list]: The list of GOES flare events corresponding to the input search criteria.

    Optional Outputs:
        None

    Example:
        goes_events = get_goes_flare_events(pd.Timestamp('2010-05-01 00:00:00'),
                                            pd.Timestamp('2018-01-12 00:00:00'),
                                            verbose=True)
    """

    # Prepare the logger for verbose
    if verbose:
        # TODO: Update the path
        logger = JpmLogger(filename='get_goes_flare_events_log',
                           path='/Users/jmason86/Desktop/')
        logger.info("Getting > {0} flares from {1} to {2}.".format(
            minimum_flare_size, start_time, end_time))

    if not isinstance(start_time, str):
        start_time = metatimes_to_human(np.array([start_time]))[0]
    if not isinstance(end_time, str):
        end_time = metatimes_to_human(np.array([end_time]))[0]
    time_range = TimeRange(start_time, end_time)
    goes_events = get_goes_event_list(time_range, goes_class_filter='c1')

    if verbose:
        logger.info("Found {0} events.".format(len(goes_events)))

    # Return the slopes
    return goes_events

Пример #3

Показать файл

Файл: light_curve_peak_match_subtract.py Проект: tyleralbee/James-s-EVE-Dimming-Index-JEDI

def light_curve_peak_match_subtract(light_curve_to_subtract_from_df, light_curve_to_subtract_with_df, estimated_time_of_peak,
                                    max_seconds_shift=1800,
                                    plot_path_filename=None, verbose=False, logger=None):
    """Align the peak of a second light curve to the first, scale its magnitude to match, and subtract it off.

    Inputs:
        light_curve_to_subtract_from_df [pd DataFrame]: A pandas DataFrame with a DatetimeIndex and a column for irradiance.
        light_curve_to_subtract_with_df [pd DataFrame]: A pandas DataFrame with a DatetimeIndex and a column for irradiance.
        estimated_time_of_peak [metatime]: The estimated time that the peak should occur. This could come from, e.g., GOES/XRS.

    Optional Inputs:
        max_seconds_shift [int]:  The maximum allowed time shift in seconds to get the peaks to match.
        plot_path_filename [str]: Set to a path and filename in order to save the summary plot to disk.
                                  Default is None, meaning the plot will not be saved to disk.
        verbose [bool]:           Set to log the processing messages to disk and console. Default is False.
        logger [JpmLogger]:       A configured logger from jpm_logger.py. If set to None, will generate a
                                  new one. Default is None.

    Outputs:
        light_curve_corrected_df [pd DataFrame]: A pandas DataFrame with the same format as light_curve_to_subtract_from_df but
                                                 with the resultant peak match and subtraction performed. Returns np.nan if
                                                 the peaks couldn't be found.
        seconds_shift [float]:                   The number of seconds that light_curve_to_subtract_with_df was shifted to get
                                                 its peak to match light_curve_to_subtract_from_df. Returns np.nan if
                                                 the peaks couldn't be found.
        scale_factor [float]:                    The multiplicative factor applied to light_curve_to_subtract_with_df to get
                                                 its peak to match light_curve_to_subtract_from_df. Returns np.nan if
                                                 the peaks couldn't be found.

    Optional Outputs:
        None

    Example:
        light_curve_corrected_df, seconds_shift, scale_factor = light_curve_peak_match_subtract(light_curve_to_subtract_from_df,
                                                                                                light_curve_to_subtract_with_df,
                                                                                                estimated_time_of_peak,
                                                                                                plot_path_filename='./',
                                                                                                verbose=True)
    """

    # Prepare the logger for verbose
    if verbose:
        if not logger:
            logger = JpmLogger(filename='light_curve_peak_match_subtract_log', path='/Users/jmason86/Desktop/')
        logger.info("Running on event with light curve start time of {0}.".format(light_curve_to_subtract_from_df.index[0]))

    # Drop NaNs since peakutils can't handle them
    light_curve_to_subtract_from_df = light_curve_to_subtract_from_df.dropna()
    light_curve_to_subtract_with_df = light_curve_to_subtract_with_df.dropna()

    # Detrend and find the peaks that are >= 95% of the max irradiance within
    if verbose:
        logger.info("Detrending light curves.")
    if (light_curve_to_subtract_from_df['irradiance'].values < 0).all():
        light_curve_to_subtract_from_df.iloc[0] = 1  # Else can crash peakutils.baseline
    base_from = peakutils.baseline(light_curve_to_subtract_from_df)
    detrend_from = light_curve_to_subtract_from_df - base_from
    indices_from = peakutils.indexes(detrend_from.values.squeeze(), thres=0.95)

    if (light_curve_to_subtract_with_df['irradiance'].values < 0).all():
        light_curve_to_subtract_with_df.iloc[0] = 1  # Else can crash peakutils.baseline
    base_with = peakutils.baseline(light_curve_to_subtract_with_df)
    detrend_with = light_curve_to_subtract_with_df - base_with
    indices_with = peakutils.indexes(detrend_with.values.squeeze(), thres=0.95)

    if len(indices_from) == 0:
        if verbose:
            logger.warning('Could not find peak in light curve to subtract from.')
        return np.nan, np.nan, np.nan
    if len(indices_with) == 0:
        if verbose:
            logger.warning('Could not find peak in light curve to subtract with.')
        return np.nan, np.nan, np.nan

    # Identify the peak closest to the input estimated peak time (e.g., from GOES/XRS)
    if verbose:
        logger.info("Identifying peaks closest to initial guess in light curves.")
    peak_index_from = indices_from[closest(light_curve_to_subtract_from_df.index[indices_from], estimated_time_of_peak)]
    if len(indices_with) == 0:
        import pdb
        pdb.set_trace()
    peak_index_with = indices_with[closest(light_curve_to_subtract_with_df.index[indices_with], estimated_time_of_peak)]
    index_shift = peak_index_from - peak_index_with

    # Compute how many seconds the time shift corresponds to
    seconds_shift = (light_curve_to_subtract_from_df.index[peak_index_from] -
                     light_curve_to_subtract_with_df.index[peak_index_with]).total_seconds()

    # Fail if seconds_shift > max_seconds_shift
    isTimeShiftValid = True
    if abs(seconds_shift) > max_seconds_shift:
        if verbose:
            logger.warning("Cannot do peak match. Time shift of {0} seconds is greater than max allowed shift of {1} seconds.".format(seconds_shift, max_seconds_shift))
        isTimeShiftValid = False

    # Shift the subtract_with light curve in time to align its peak to the subtract_from light curve
    if isTimeShiftValid:
        if verbose:
            logger.info("Shifting and scaling the light curve to subtract with.")
        shifted_with = light_curve_to_subtract_with_df.shift(index_shift)

        # Scale the subtract_with light curve peak irradiance to match the subtract_from light curve peak irradiance
        scale_factor = (detrend_from.values[peak_index_from] / shifted_with.values[peak_index_with + index_shift])[0]
        shifted_scaled_with = shifted_with * scale_factor
        light_curve_corrected_df = light_curve_to_subtract_from_df - shifted_scaled_with

        if verbose:
            if light_curve_corrected_df.isnull().values.sum() > 1:
                logger.warning("%s points were shifted to become NaN." % light_curve_corrected_df.isnull().values.sum())
            logger.info("Light curve peak matching and subtraction complete.")

    if plot_path_filename:
        from jpm_number_printing import latex_float
        seconds_shift_string = '+' if seconds_shift >= 0 else ''
        seconds_shift_string += str(int(seconds_shift))
        if isTimeShiftValid:
            scale_factor_string = latex_float(scale_factor)

        plt.style.use('jpm-transparent-light')
        from matplotlib import dates

        plt.clf()
        fig, ax = plt.subplots()
        plt.plot(light_curve_to_subtract_from_df.index.values, light_curve_to_subtract_from_df.values, c='limegreen')
        plt.tick_params(axis='x', which='minor', labelbottom='off')
        plt.xlabel(estimated_time_of_peak)
        plt.ylabel('Irradiance [%]')
        fmtr = dates.DateFormatter("%H:%M:%S")
        ax.xaxis.set_major_formatter(fmtr)
        ax.xaxis.set_major_locator(dates.HourLocator())

        if isTimeShiftValid:
            plt.title('I: $\\times$' + scale_factor_string + ', t: ' + seconds_shift_string + ' s', color='tomato')
            shifted_scaled_with.plot(c='tomato', label='subtract with', ax=ax)
            light_curve_corrected_df.plot(c='darkgrey', label='result', ax=ax)
        else:
            plt.title('t: ' + seconds_shift_string + ' s > max allowed {0} s'.format(max_seconds_shift), color='tomato')
            plt.plot(light_curve_to_subtract_with_df.index.values, light_curve_to_subtract_with_df.values, c='tomato')
        plt.scatter(light_curve_to_subtract_from_df.index[peak_index_from], light_curve_to_subtract_from_df.values[peak_index_from], c='black')

        if isTimeShiftValid:
            plt.scatter(shifted_scaled_with.index[peak_index_with + index_shift], shifted_scaled_with.values[peak_index_with + index_shift], c='black')
            ax.legend(['subtract from', 'subtract with', 'result'], loc='best')
        else:
            plt.scatter(light_curve_to_subtract_with_df.index[peak_index_with], light_curve_to_subtract_with_df.values[peak_index_with], c='black')
            ax.legend(['subtract from', 'subtract with'], loc='best')

        path = os.path.dirname(plot_path_filename)
        if not os.path.exists(path):
            os.makedirs(path)
        plt.savefig(plot_path_filename)

        if verbose:
            logger.info("Summary plot saved to %s" % plot_path_filename)

    if isTimeShiftValid:
        return light_curve_corrected_df, seconds_shift, scale_factor
    else:
        return np.nan, seconds_shift, np.nan

Пример #4

Показать файл

Файл: automatic_fit_light_curve.py Проект: tyleralbee/James-s-EVE-Dimming-Index-JEDI

def automatic_fit_light_curve(light_curve_df, minimum_score=0.3, plots_save_path=None,
                              verbose=False, logger=None):
    """Automatically fit the best support vector machine regression (SVR) model for the input light curve.

    Inputs:
        light_curve_df [pd DataFrame]: A pandas DataFrame with a DatetimeIndex, and columns for irradiance and uncertainty.

    Optional Inputs:
        minimum_score [float]: Set this to the minimum explained variance score (0 - 1) acceptable for fits. If the
                               best fit score is < minimum_score, this function will return np.nan for light_curve_fit.
                               Default value is 0.3.
        plots_save_path [str]: Set to a path in order to save the validation curve and best fit overplot on the data to disk.
                               Default is None, meaning no plots will be saved to disk.
        verbose [bool]:        Set to log the processing messages to disk and console. Default is False.
        logger [JpmLogger]:    A configured logger from jpm_logger.py. If set to None, will generate a
                               new one. Default is None.

    Outputs:
        light_curve_fit_df [pd DataFrame]: A pandas DataFrame with a DatetimeIndex, and columns for fitted irradiance and uncertainty.
        best_fit_gamma [float]:            The best found gamma hyper parameter for the SVR.
        best_fit_score [float]:            The best explained variance score.

    Optional Outputs:
        None

    Example:
        light_curve_fit, best_fit_gamma, best_fit_score = automatic_fit_light_curve(light_curve_df, verbose=True)
    """

    # Prepare the logger for verbose
    if verbose:
        if not logger:
            logger = JpmLogger(filename='automatic_fit_light_curve_log', path='/Users/jmason86/Desktop/')
        logger.info("Running on event with light curve start time of {0}.".format(light_curve_df.index[0]))

    # Pull data out of the DataFrame for compatibility formatting
    X = metatimes_to_seconds_since_start(light_curve_df.index)
    y = light_curve_df['irradiance'].values

    # Check for NaNs and issue warning that they are being removed from the dataset
    if verbose:
        if np.isnan(y).any():
            logger.warning("There are NaN values in light curve. Dropping them.")
    finite_irradiance_indices = np.isfinite(y)
    X = X[finite_irradiance_indices]
    X = X.reshape(len(X), 1)  # Format to be compatible with validation_curve and SVR.fit()
    uncertainty = light_curve_df.uncertainty[np.isfinite(y)]
    y = y[finite_irradiance_indices]

    if verbose:
        logger.info("Fitting %s points." % len(y))

    # Helper function for compatibility with validation_curve
    def jpm_svr(gamma=1e-6, **kwargs):
        return make_pipeline(SVR(kernel='rbf', C=1e3, gamma=gamma, **kwargs))

    # Hyper parameter for SVR is gamma, so generate values of it to try
    gamma = np.logspace(-7, 1, num=20, base=10)

    # Overwrite the default scorer (R^2) with explained variance score
    evs = make_scorer(explained_variance_score)

    # Split the data between training/testing 50/50 but across the whole time range rather than the default consecutive Kfolds
    import time
    t0 = time.time()
    shuffle_split = ShuffleSplit(n_splits=20, train_size=0.5, test_size=0.5, random_state=None)

    # Generate the validation curve -- test all them gammas!
    # Parallelized to speed it up (n_jobs = # of parallel threads)
    train_score, val_score = validation_curve(jpm_svr(), X, y,
                                              'svr__gamma',
                                              gamma, cv=shuffle_split, n_jobs=7, scoring=evs)
    t1 = time.time()
    logger.error('It took {0} seconds to run.'.format(t1 - t0))

    if verbose:
        logger.info("Validation curve complete.")

    if plots_save_path:
        plt.clf()
        plt.style.use('jpm-transparent-light')
        plt.plot(gamma, np.median(train_score, 1), label='training score')
        plt.plot(gamma, np.median(val_score, 1), label='validation score')
        ax = plt.axes()
        plt.legend(loc='best')
        plt.title("t$_0$ = " + datetimeindex_to_human(light_curve_df.index)[0])
        ax.set_xscale('log')
        plt.xlabel('gamma')
        plt.ylabel('score')
        plt.ylim(0, 1)
        filename = plots_save_path + 'Validation Curve t0 ' + datetimeindex_to_human(light_curve_df.index)[0] + '.png'
        plt.savefig(filename)
        if verbose:
            logger.info("Validation curve saved to %s" % filename)

    # Identify the best score
    scores = np.median(val_score, axis=1)
    best_fit_score = np.max(scores)
    best_fit_gamma = gamma[np.argmax(scores)]
    if verbose:
        logger.info('Scores: ' + str(scores))
        logger.info('Best score: ' + str(best_fit_score))
        logger.info('Best fit gamma: ' + str(best_fit_gamma))

    # Return np.nan if only got bad fits
    if best_fit_score < minimum_score:
        if verbose:
            logger.warning("Uh oh. Best fit score {0:.2f} is < user-defined minimum score {1:.2f}".format(best_fit_score, minimum_score))
        return np.nan, best_fit_gamma, best_fit_score

    # Otherwise train and fit the best model
    sample_weight = 1 / uncertainty
    model = SVR(kernel='rbf', C=1e3, gamma=best_fit_gamma).fit(X, y, sample_weight)
    y_fit = model.predict(X)

    if verbose:
        logger.info("Best model trained and fitted.")

    if plots_save_path:
        plt.clf()
        plt.errorbar(X.ravel(), y, yerr=uncertainty, color='black', fmt='o', label='Input light curve')
        plt.plot(X.ravel(), y_fit, linewidth=6, label='Fit')
        plt.title("t$_0$ = " + datetimeindex_to_human(light_curve_df.index)[0])
        plt.xlabel('time [seconds since start]')
        plt.ylabel('irradiance [%]')
        plt.legend(loc='best')
        filename = plots_save_path + 'Fit t0 ' + datetimeindex_to_human(light_curve_df.index)[0] + '.png'
        plt.savefig(filename)
        if verbose:
            logger.info("Fitted curve saved to %s" % filename)

    # TODO: Get uncertainty of fit at each point... if that's even possible
    # Placeholder for now just so that the function can complete: output uncertainty = input uncertainty
    fit_uncertainty = uncertainty

    # Construct a pandas DataFrame with DatetimeIndex, y_fit, and fit_uncertainty
    light_curve_fit_df = pd.DataFrame({'irradiance': y_fit,
                                       'uncertainty': fit_uncertainty})
    light_curve_fit_df.index = light_curve_df.index[finite_irradiance_indices]
    if verbose:
        logger.info("Created output DataFrame")

    return light_curve_fit_df, best_fit_gamma, best_fit_score

Пример #5

Показать файл

Файл: determine_dimming_slope.py Проект: tyleralbee/James-s-EVE-Dimming-Index-JEDI

def determine_dimming_slope(light_curve_df,
                            earliest_allowed_time=None,
                            latest_allowed_time=None,
                            smooth_points=0,
                            plot_path_filename=None,
                            verbose=False,
                            logger=None):
    """Find the slope of dimming in a light curve, if any.

    Inputs:
        light_curve_df [pd DataFrame]:    A pandas DataFrame with a DatetimeIndex and a column for irradiance.

    Optional Inputs:
        earliest_allowed_time [metatime]: The function won't return a slope determined any earlier than this.
                                          It is recommended that this be the peak time of the flare.
                                          Default is None, meaning the beginning of the light_curve_df.
        latest_allowed_time [metatime]:   The function won't return a slope determined any later than this.
                                          It is recommended that this be the identified time of dimming depth.
                                          Default is None, meaning the end of the light_curve_df.
        smooth_points [integer]:          Used to apply a rolling mean with the number of points (indices) specified.
                                          Default is 0, meaning no smoothing will be performed.
        plot_path_filename [str]:         Set to a path and filename in order to save the summary plot to disk.
                                          Default is None, meaning the plot will not be saved to disk.
        verbose [bool]:                   Set to log the processing messages to disk and console. Default is False.
        logger [JpmLogger]:               A configured logger from jpm_logger.py. If set to None, will generate a
                                          new one. Default is None.

    Outputs:
        slope_min [float]: The minimum slope of dimming in percent/second terms.
        slope_max [float]: The maximum slope of dimming in percent/second terms.
        slope_mean [float]: The mean slope of dimming in percent/second terms.

    Optional Outputs:
        None

    Example:
        slope_min, slope_max, slope_mean = determine_dimming_slope(light_curve_df,
                                                                   plot_path_filename='./determine_dimming_slope_summary.png',
                                                                   verbose=True)
    """

    # Prepare the logger for verbose
    if verbose:
        if not logger:
            logger = JpmLogger(filename='determine_dimming_slope_log',
                               path='/Users/jmason86/Desktop/')
        logger.info(
            "Running on event with light curve start time of {0}.".format(
                light_curve_df.index[0]))

    # If no earliest_allowed_time set, then set it to beginning of light_curve_df
    if not earliest_allowed_time:
        earliest_allowed_time = light_curve_df.index[0]
        logger.info(
            "No earliest allowed time provided. Setting to beginning of light curve: {0}"
            .format(earliest_allowed_time))

    # If no latest_allowed_time set, then set it to end of light_curve_df
    if not latest_allowed_time:
        latest_allowed_time = light_curve_df.index[-1]
        logger.info(
            "No latest allowed time provided. Setting to end of light curve: {0}"
            .format(latest_allowed_time))

    # Optionally smooth the light curve with a rolling mean
    if smooth_points:
        light_curve_df['irradiance'] = light_curve_df.rolling(
            smooth_points, center=True).mean()
        if verbose:
            logger.info('Applied {0} point smooth.'.format(smooth_points))

    first_non_nan = light_curve_df['irradiance'].first_valid_index()
    nan_indices = np.isnan(light_curve_df['irradiance'])
    light_curve_df['irradiance'][nan_indices] = light_curve_df['irradiance'][
        first_non_nan]

    # Find the max in the allowed window
    max_time = light_curve_df[earliest_allowed_time:latest_allowed_time][
        'irradiance'].idxmax()
    max_irradiance = light_curve_df['irradiance'].loc[max_time]
    if verbose:
        logger.info(
            'Maximum in allowed window found with value of {0:.2f} at time {1}'
            .format(max_irradiance, max_time))

    # Compute the derivative in the time window of interest (inverting sign so that we describe "downward slope")
    derivative = -light_curve_df[max_time:latest_allowed_time][
        'irradiance'].diff(
        ) / light_curve_df[max_time:latest_allowed_time].index.to_series(
        ).diff().dt.total_seconds()
    if verbose:
        logger.info(
            "Computed derivative of light curve within time window of interest."
        )

    # Get the min, max, and mean slope
    slope_min = derivative.min()
    slope_max = derivative.max()
    slope_mean = derivative.mean()
    slope_min_str = latex_float(slope_min)
    slope_max_str = latex_float(slope_max)
    slope_mean_str = latex_float(slope_mean)
    if verbose:
        logger.info(
            "Computed min ({0}), max ({1}), and mean ({2}) %/s slope.".format(
                slope_min_str, slope_max_str, slope_mean_str))

    # Do a few sanity checks for the log
    if verbose:
        if slope_min < 0:
            logger.warning(
                "Minimum slope of {0} is unexpectedly < 0.".format(slope_min))
        if slope_max < 0:
            logger.warning(
                "Maximum slope of {0} is unexpectedly < 0.".format(slope_max))
        if slope_mean < 0:
            logger.warning(
                "Mean slope of {0} is unexpectedly < 0.".format(slope_mean))

    # Produce a summary plot
    if plot_path_filename:
        plt.style.use('jpm-transparent-light')
        from matplotlib import dates

        p = plt.plot(light_curve_df['irradiance'])
        p = plt.plot(
            light_curve_df[max_time:latest_allowed_time]['irradiance'],
            label='slope region')
        ax = plt.gca()
        plt.axvline(x=earliest_allowed_time, linestyle='dashed', color='grey')
        plt.axvline(x=latest_allowed_time, linestyle='dashed', color='black')
        plt.axvline(x=max_time, linestyle='dashed', color='black')
        plt.title('Identified Slope')

        start_date = light_curve_df.index.values[0]
        start_date_string = pd.to_datetime(str(start_date))
        plt.xlabel(start_date_string.strftime('%Y-%m-%d %H:%M:%S'))
        fmtr = dates.DateFormatter("%H:%M:%S")
        ax.xaxis.set_major_formatter(fmtr)
        ax.xaxis.set_major_locator(dates.HourLocator())
        ax.xaxis.grid(b=True, which='minor')
        plt.ylabel('Irradiance [%]')

        inverse_str = '$^{-1}$'
        plt.annotate('slope_min={0} % sec{1}'.format(slope_min_str,
                                                     inverse_str),
                     xy=(0.98, 0.12),
                     xycoords='axes fraction',
                     ha='right',
                     size=12,
                     color=p[0].get_color())
        plt.annotate('slope_max={0} % sec{1}'.format(slope_max_str,
                                                     inverse_str),
                     xy=(0.98, 0.08),
                     xycoords='axes fraction',
                     ha='right',
                     size=12,
                     color=p[0].get_color())
        plt.annotate('slope_mean={0} % sec{1}'.format(slope_mean_str,
                                                      inverse_str),
                     xy=(0.98, 0.04),
                     xycoords='axes fraction',
                     ha='right',
                     size=12,
                     color=p[0].get_color())

        ax.legend(loc='best')

        plt.savefig(plot_path_filename)
        if verbose:
            logger.info("Summary plot saved to %s" % plot_path_filename)

    # Return the slopes
    return slope_min, slope_max, slope_mean

Пример #6

Показать файл

def generate_jedi_catalog(
        threshold_time_prior_flare_minutes=240.0,
        dimming_window_relative_to_flare_minutes_left=0.0,
        dimming_window_relative_to_flare_minutes_right=240.0,
        threshold_minimum_dimming_window_minutes=120.0,
        flare_index_range=range(0, 5052),
        output_path='/Users/shawnpolson/Documents/School/Spring 2018/Data Mining/StealthCMEs/PyCharm/JEDI Catalog/',
        verbose=True):
    """Wrapper code for creating James's Extreme Ultraviolet Variability Experiment (EVE) Dimming Index (JEDI) catalog.

    Inputs:
        None.

    Optional Inputs:
        threshold_time_prior_flare_minutes [float]:             How long before a particular event does the last one need to have
                                                                occurred to be considered independent. If the previous one was too
                                                                recent, will use that event's pre-flare irradiance.
                                                                Default is 240 (4 hours).
        dimming_window_relative_to_flare_minutes_left [float]:  Defines the left side of the time window to search for dimming
                                                                relative to the GOES/XRS flare peak. Negative numbers mean
                                                                minutes prior to the flare peak. Default is 0.0.
        dimming_window_relative_to_flare_minutes_right [float]: Defines the right side of the time window to search for dimming
                                                                relative to the GOES/XRS flare peak. If another flare
                                                                occurs before this, that time will define the end of the
                                                                window instead. Default is 240 (4 hours).
        threshold_minimum_dimming_window_minutes [float]:       The smallest allowed time window in which to search for dimming.
                                                                Default is 120.
        flare_index_range [range]                               The range of GOES flare indices to process. Default is range(0, 5052).
        output_path [str]:                                      Set to a path for saving the JEDI catalog table and processing
                                                                summary plots. Default is '/Users/jmason86/Dropbox/Research/Postdoc_NASA/Analysis/Coronal Dimming Analysis/JEDI Catalog/'.
        verbose [bool]:                                         Set to log the processing messages to disk and console. Default is False.

    Outputs:
        No direct return, but writes a (csv? sql table? hdf5?) to disk with the dimming paramerization results.
        Subroutines also optionally save processing plots to disk in output_path.

    Optional Outputs:
        None

    Example:
        generate_jedi_catalog(output_path='/Users/jmason86/Dropbox/Research/Postdoc_NASA/Analysis/Coronal Dimming Analysis/JEDI Catalog/',
                              verbose=True)
    """

    # Prepare the logger for verbose
    if verbose:
        logger = JpmLogger(filename='generate_jedi_catalog',
                           path=output_path,
                           console=False)
        logger.info("Starting JEDI processing pipeline.")
        logger.info("Processing events {0} - {1}".format(
            flare_index_range[0], flare_index_range[-1]))
    else:
        logger = None

    # Get EVE level 2 extracted emission lines data
    # TODO: Replace this shortcut method with the method I'm building into sunpy
    from scipy.io.idl import readsav
    eve_readsav = readsav(
        '/Users/shawnpolson/Documents/School/Spring 2018/Data Mining/StealthCMEs/savesets/eve_lines_2010121-2014146 MEGS-A Mission Bare Bones.sav'
    )
    if verbose:
        logger.info('Loaded EVE data')

    # Create metadata dictionary
    # TODO: Replace this shortcut method with the method I'm building into sunpy
    from sunpy.util.metadata import MetaDict
    metadata = MetaDict()
    metadata['ion'] = eve_readsav['name']
    metadata['temperature_ion_peak_formation'] = np.power(
        10.0, eve_readsav['logt']) * u.Kelvin
    metadata['extracted_wavelength_center'] = eve_readsav['wavelength'] * u.nm
    metadata['extracted_wavelength_min'] = metadata[
        'extracted_wavelength_center']
    metadata['extracted_wavelength_max'] = metadata[
        'extracted_wavelength_center']
    metadata['emission_line_blends'] = ['none', 'yay', 'poop', 'Fe vi']  # etc
    metadata[
        'exposure_time'] = 60.0 * u.second  # These example EVE data are already binned down to 1 minute
    metadata['precision'] = ['Not implemented in prototype']
    metadata['accuracy'] = ['Not implemented in prototype']
    metadata['flags'] = ['Not implemented in prototype']
    metadata['flags_description'] = '1 = MEGS-A data is missing, ' \
                                    '2 = MEGS-B data is missing, ' \
                                    '4 = ESP data is missing, ' \
                                    '8 = MEGS-P data is missing, ' \
                                    '16 = Possible clock adjust in MEGS-A, ' \
                                    '32 = Possible clock adjust in MEGS-B, ' \
                                    '64 = Possible clock adjust in ESP, ' \
                                    '128 = Possible clock adjust in MEGS-P'
    metadata['flags_spacecraft'] = ['Not implemented in prototype']
    metadata['flags_spacecraft_description'] = '0 = No obstruction, ' \
                                               '1 = Warm up from Earth eclipse, ' \
                                               '2 = Obstruction atmosphere penumbra, ' \
                                               '3 = Obstruction atmosphere umbra, ' \
                                               '4 = Obstruction penumbra of Mercury, ' \
                                               '5 = Obstruction penumbra of Mercury, ' \
                                               '6 = Obstruction penumbra of Venus, ' \
                                               '7 = Obstruction umbra of Venus, ' \
                                               '8 = Obstruction penumbra of Moon, ' \
                                               '9 = Obstruction umbra of Moon, ' \
                                               '10 = Obstruction penumbra of solid Earth, ' \
                                               '11 = Obstruction umbra of solid Earth, ' \
                                               '16 = Observatory is off-pointed by more than 1 arcmin'
    metadata['data_version'] = ['Not implemented in prototype']
    metadata['data_reprocessed_revision'] = ['Not implemented in prototype']
    metadata['filename'] = ['Not implemented in prototype']

    # Load up the actual irradiance data into a pandas DataFrame
    # TODO: Replace this shortcut method with the method I'm building into sunpy
    irradiance = eve_readsav['irradiance'].byteswap().newbyteorder(
    )  # pandas doesn't like big endian
    irradiance[irradiance == -1] = np.nan
    wavelengths = eve_readsav['wavelength']
    wavelengths_str = []
    [
        wavelengths_str.append('{0:1.1f}'.format(wavelength))
        for wavelength in wavelengths
    ]
    eve_lines = pd.DataFrame(irradiance, columns=wavelengths_str)
    eve_lines.index = pd.to_datetime(eve_readsav.iso.astype(str))
    eve_lines = eve_lines.drop_duplicates()

    # slice out only columns needed by Shawn
    # eve_selected_lines = eve_lines.drop(columns=['9.4', '13.1', '13.3', '25.6', '28.4', '30.4', '33.5', '36.1', '36.8', '44.6', '46.5', '49.9', '52.1', '52.6', '53.7', '55.4', '56.8', '58.4', '59.2', '60.0', '61.0', '62.5', '63.0', '71.9', '72.2', '77.0', '79.0', '83.6', '95.0', '97.3', '97.7', '102.6', '103.2'])
    # eve_selected_lines.info()
    # eve_selected_lines.to_csv('/Users/shawnpolson/Documents/School/Spring 2018/Data Mining/StealthCMEs/PyCharm/JEDI Catalog/eve_selected_lines_forreal.csv')

    # Get GOES flare events above C1 within date range corresponding to EVE data
    # flares = get_goes_flare_events(eve_lines.index[0], eve_lines.index[-1], verbose=verbose)  # TODO: The method in sunpy needs fixing, issue 2434

    # Load GOES events from IDL saveset instead of directly through sunpy
    goes_flare_events = readsav(
        '/Users/shawnpolson/Documents/School/Spring 2018/Data Mining/StealthCMEs/savesets/GoesEventsMegsAEra.sav'
    )
    goes_flare_events['class'] = goes_flare_events['class'].astype(str)
    goes_flare_events['event_peak_time_human'] = goes_flare_events[
        'event_peak_time_human'].astype(str)
    goes_flare_events['event_start_time_human'] = goes_flare_events[
        'event_start_time_human'].astype(str)
    goes_flare_events['peak_time'] = Time(
        goes_flare_events['event_peak_time_jd'], format='jd', scale='utc')
    goes_flare_events['start_time'] = Time(
        goes_flare_events['event_start_time_jd'], format='jd', scale='utc')
    if verbose:
        logger.info('Loaded GOES flare events.')

    # Define the columns of the JEDI catalog
    jedi_row = pd.DataFrame([
        OrderedDict([('Event #', np.nan), ('GOES Flare Start Time', np.nan),
                     ('GOES Flare Peak Time', np.nan),
                     ('GOES Flare Class', np.nan),
                     ('Pre-Flare Start Time', np.nan),
                     ('Pre-Flare End Time', np.nan),
                     ('Flare Interrupt', np.nan)])
    ])
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=eve_lines.columns +
                     ' Pre-Flare Irradiance [W/m2]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=eve_lines.columns + ' Slope Start Time'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=eve_lines.columns + ' Slope End Time'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=eve_lines.columns + ' Slope Min [%/s]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=eve_lines.columns + ' Slope Max [%/s]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=eve_lines.columns + ' Slope Mean [%/s]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=eve_lines.columns + ' Slope Uncertainty [%/s]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=eve_lines.columns + ' Depth Time'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=eve_lines.columns + ' Depth [%]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=eve_lines.columns + ' Depth Uncertainty [%]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=eve_lines.columns + ' Duration Start Time'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=eve_lines.columns + ' Duration End Time'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=eve_lines.columns + ' Duration [s]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=eve_lines.columns + ' Fitting Gamma'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=eve_lines.columns + ' Fitting Score'))

    ion_tuples = list(itertools.permutations(eve_lines.columns.values, 2))
    ion_permutations = pd.Index(
        [' by '.join(ion_tuples[i]) for i in range(len(ion_tuples))])

    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Slope Start Time'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Slope End Time'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Slope Min [%/s]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Slope Max [%/s]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Slope Mean [%/s]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Slope Uncertainty [%/s]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Depth Time'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Depth [%]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Depth Uncertainty [%]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Duration Start Time'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Duration End Time'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Duration [s]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Correction Time Shift [s]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Correction Scale Factor'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Fitting Gamma'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Fitting Score'))

    csv_filename = output_path + 'jedi_{0}.csv'.format(Time.now().iso)
    jedi_row.to_csv(csv_filename, header=True, index=False, mode='w')

    if verbose:
        logger.info('Created JEDI row definition.')

    # Start a progress bar
    widgets = [
        progressbar.Percentage(),
        progressbar.Bar(),
        progressbar.Timer(), ' ',
        progressbar.AdaptiveETA()
    ]
    progress_bar = progressbar.ProgressBar(
        widgets=[progressbar.FormatLabel('Flare Event Loop: ')] + widgets,
        min_value=flare_index_range[0],
        max_value=flare_index_range[-1]).start()

    # Prepare a hold-over pre-flare irradiance value,
    # which will normally have one element for each of the 39 emission lines
    preflare_irradiance = np.nan

    # Start loop through all flares
    for flare_index in flare_index_range:

        # Skip event 0 to avoid problems with referring to earlier indices
        if flare_index == 0:
            continue

        # Reset jedi_row
        jedi_row[:] = np.nan

        # Reset the flare interrupt flag
        flare_interrupt = False

        # Fill the GOES flare information into the JEDI row
        jedi_row['Event #'] = flare_index
        jedi_row['GOES Flare Start Time'] = goes_flare_events['start_time'][
            flare_index].iso
        jedi_row['GOES Flare Peak Time'] = goes_flare_events['peak_time'][
            flare_index].iso
        jedi_row['GOES Flare Class'] = goes_flare_events['class'][flare_index]
        if verbose:
            logger.info(
                "Event {0} GOES flare details stored to JEDI row.".format(
                    flare_index))

        # If haven't already done all pre-parameterization processing
        processed_jedi_non_params_filename = output_path + 'Processed Pre-Parameterization Data/Event {0} Pre-Parameterization.h5'.format(
            flare_index)
        processed_lines_filename = output_path + 'Processed Lines Data/Event {0} Lines.h5'.format(
            flare_index)
        if not os.path.isfile(processed_lines_filename) or not os.path.isfile(
                processed_jedi_non_params_filename):
            # Determine pre-flare irradiance
            minutes_since_last_flare = (
                goes_flare_events['peak_time'][flare_index] -
                goes_flare_events['peak_time'][flare_index - 1]).sec / 60.0
            if minutes_since_last_flare > threshold_time_prior_flare_minutes:
                # Clip EVE data from threshold_time_prior_flare_minutes prior to flare up to peak flare time
                preflare_window_start = (
                    goes_flare_events['peak_time'][flare_index] -
                    (threshold_time_prior_flare_minutes * u.minute)).iso
                preflare_window_end = (
                    goes_flare_events['peak_time'][flare_index]).iso
                eve_lines_preflare_time = eve_lines[
                    preflare_window_start:preflare_window_end]

                # Loop through the emission lines and get pre-flare irradiance for each
                preflare_irradiance = []
                for column in eve_lines_preflare_time:
                    eve_line_preflare_time = pd.DataFrame(
                        eve_lines_preflare_time[column])
                    eve_line_preflare_time.columns = ['irradiance']
                    preflare_irradiance.append(
                        determine_preflare_irradiance(
                            eve_line_preflare_time,
                            pd.Timestamp(goes_flare_events['start_time']
                                         [flare_index].iso),
                            plot_path_filename=output_path +
                            'Preflare Determination/Event {0} {1}.png'.format(
                                flare_index, column),
                            verbose=verbose,
                            logger=logger))
                    plt.close('all')
            else:
                logger.info(
                    "This flare at {0} will use the pre-flare irradiance from flare at {1}."
                    .format(
                        goes_flare_events['peak_time'][flare_index].iso,
                        goes_flare_events['peak_time'][flare_index - 1].iso))

            jedi_row["Pre-Flare Start Time"] = preflare_window_start
            jedi_row["Pre-Flare End Time"] = preflare_window_end
            preflare_irradiance_cols = [
                col for col in jedi_row.columns
                if 'Pre-Flare Irradiance' in col
            ]
            jedi_row[preflare_irradiance_cols] = preflare_irradiance

            if verbose:
                logger.info(
                    "Event {0} pre-flare determination complete.".format(
                        flare_index))

            # Clip EVE data to dimming window
            bracket_time_left = (
                goes_flare_events['peak_time'][flare_index] -
                (dimming_window_relative_to_flare_minutes_left * u.minute))
            next_flare_time = Time(
                (goes_flare_events['peak_time'][flare_index + 1]).iso)
            user_choice_time = (
                goes_flare_events['peak_time'][flare_index] +
                (dimming_window_relative_to_flare_minutes_right * u.minute))
            bracket_time_right = min(next_flare_time, user_choice_time)

            # If flare is shortening the window, set the flare_interrupt flag
            if bracket_time_right == next_flare_time:
                flare_interrupt = True
                if verbose:
                    logger.info(
                        'Flare interrupt for event at {0} by flare at {1}'.
                        format(goes_flare_events['peak_time'][flare_index].iso,
                               next_flare_time))

            # Write flare_interrupt to JEDI row
            jedi_row['Flare Interrupt'] = flare_interrupt

            # Skip event if the dimming window is too short
            if ((bracket_time_right - bracket_time_left).sec /
                    60.0) < threshold_minimum_dimming_window_minutes:
                # Leave all dimming parameters as NaN and write this null result to the CSV on disk
                jedi_row.to_csv(csv_filename,
                                header=False,
                                index=False,
                                mode='a')

                # Log message
                if verbose:
                    logger.info(
                        'The dimming window duration of {0} minutes is shorter than the minimum threshold of {1} minutes. Skipping this event ({2})'
                        .format(((bracket_time_right - bracket_time_left).sec /
                                 60.0),
                                threshold_minimum_dimming_window_minutes,
                                goes_flare_events['peak_time'][flare_index]))

                # Skip the rest of the processing in the flare_index loop
                continue
            else:
                eve_lines_event = eve_lines[bracket_time_left.
                                            iso:bracket_time_right.iso]

            if verbose:
                logger.info(
                    "Event {0} EVE data clipped to dimming window.".format(
                        flare_index))

            # Convert irradiance units to percent
            # (in place, don't care about absolute units from this point forward)
            eve_lines_event = (eve_lines_event - preflare_irradiance
                               ) / preflare_irradiance * 100.0

            if verbose:
                logger.info(
                    "Event {0} irradiance converted from absolute to percent units."
                    .format(flare_index))

            # Do flare removal in the light curves and add the results to the DataFrame
            progress_bar_correction = progressbar.ProgressBar(
                widgets=[progressbar.FormatLabel('Peak Match Subtract: ')] +
                widgets,
                max_value=len(ion_tuples)).start()
            for i in range(len(ion_tuples)):
                light_curve_to_subtract_from_df = pd.DataFrame(
                    eve_lines_event[ion_tuples[i][0]])
                light_curve_to_subtract_from_df.columns = ['irradiance']
                light_curve_to_subtract_with_df = pd.DataFrame(
                    eve_lines_event[ion_tuples[i][1]])
                light_curve_to_subtract_with_df.columns = ['irradiance']

                if (light_curve_to_subtract_from_df.isnull().all().all()) or (
                        light_curve_to_subtract_with_df.isnull().all().all()):
                    if verbose:
                        logger.info(
                            'Event {0} {1} correction skipped because all irradiances are NaN.'
                            .format(flare_index, ion_permutations[i]))
                else:
                    light_curve_corrected, seconds_shift, scale_factor = light_curve_peak_match_subtract(
                        light_curve_to_subtract_from_df,
                        light_curve_to_subtract_with_df,
                        pd.Timestamp(
                            (goes_flare_events['peak_time'][flare_index]).iso),
                        plot_path_filename=output_path +
                        'Peak Subtractions/Event {0} {1}.png'.format(
                            flare_index, ion_permutations[i]),
                        verbose=verbose,
                        logger=logger)

                    eve_lines_event[
                        ion_permutations[i]] = light_curve_corrected
                    jedi_row[ion_permutations[i] +
                             ' Correction Time Shift [s]'] = seconds_shift
                    jedi_row[ion_permutations[i] +
                             ' Correction Scale Factor'] = scale_factor

                    plt.close('all')

                    if verbose:
                        logger.info(
                            'Event {0} flare removal correction complete'.
                            format(flare_index))
                    progress_bar_correction.update(i)

            progress_bar_correction.finish()

            # TODO: Update calculate_eve_fe_line_precision to compute for all emission lines, not just selected
            uncertainty = np.ones(len(eve_lines_event)) * 0.002545

            # TODO: Propagate uncertainty through light_curve_peak_match_subtract and store in eve_lines_event

            # Fit the light curves to reduce influence of noise on the parameterizations to come later
            progress_bar_fitting = progressbar.ProgressBar(
                widgets=[progressbar.FormatLabel('Light curve fitting: ')] +
                widgets,
                max_value=len(eve_lines_event.columns)).start()
            for i, column in enumerate(eve_lines_event):
                if eve_lines_event[column].isnull().all().all():
                    if verbose:
                        logger.info(
                            'Event {0} {1} fitting skipped because all irradiances are NaN.'
                            .format(flare_index, column))
                else:
                    eve_line_event = pd.DataFrame(eve_lines_event[column])
                    eve_line_event.columns = ['irradiance']
                    eve_line_event['uncertainty'] = uncertainty

                    fitting_path = output_path + 'Fitting/'
                    if not os.path.exists(fitting_path):
                        os.makedirs(fitting_path)

                    plt.close('all')
                    light_curve_fit, best_fit_gamma, best_fit_score = automatic_fit_light_curve(
                        eve_line_event,
                        plots_save_path='{0} Event {1} {2} '.format(
                            fitting_path, flare_index, column),
                        verbose=verbose,
                        logger=logger)
                    eve_lines_event[column] = light_curve_fit
                    jedi_row[column + ' Fitting Gamma'] = best_fit_gamma
                    jedi_row[column + ' Fitting Score'] = best_fit_score

                    if verbose:
                        logger.info(
                            'Event {0} {1} light curves fitted.'.format(
                                flare_index, column))
                    progress_bar_fitting.update(i)

            progress_bar_fitting.finish()

        #     # Save the dimming event data to disk for quicker restore
        #     jedi_row.to_hdf(processed_jedi_non_params_filename, 'jedi_row')
        #     eve_lines_event.to_hdf(processed_lines_filename, 'eve_lines_event')
        # else:
        #     jedi_row = pd.read_hdf(processed_jedi_non_params_filename, 'jedi_row')
        #     eve_lines_event = pd.read_hdf(processed_lines_filename, 'eve_lines_event')
        #     if verbose:
        #         logger.info('Loading files {0} and {1} rather than processing again.'.format(processed_jedi_non_params_filename, processed_lines_filename))
        #
        # # Parameterize the light curves for dimming
        # for column in eve_lines_event:
        #
        #     # Null out all parameters
        #     depth_percent, depth_time = np.nan, np.nan
        #     slope_start_time, slope_end_time = np.nan, np.nan
        #     slope_min, slope_max, slope_mean = np.nan, np.nan, np.nan
        #     duration_seconds, duration_start_time, duration_end_time = np.nan, np.nan, np.nan
        #
        #     # Determine whether to do the parameterizations or not
        #     if eve_lines_event[column].isnull().all().all():
        #         if verbose:
        #             logger.info('Event {0} {1} parameterization skipped because all irradiances are NaN.'.format(flare_index, column))
        #     else:
        #         eve_line_event = pd.DataFrame(eve_lines_event[column])
        #         eve_line_event.columns = ['irradiance']
        #
        #         # Determine dimming depth (if any)
        #         depth_path = output_path + 'Depth/'
        #         if not os.path.exists(depth_path):
        #             os.makedirs(depth_path)
        #
        #         plt.close('all')
        #         depth_percent, depth_time = determine_dimming_depth(eve_line_event,
        #                                                             plot_path_filename='{0} Event {1} {2} Depth.png'.format(depth_path, flare_index, column),
        #                                                             verbose=verbose, logger=logger)
        #
        #         jedi_row[column + ' Depth [%]'] = depth_percent
        #         # jedi_row[column + ' Depth Uncertainty [%]'] = depth_uncertainty  # TODO: make determine_dimming_depth return the propagated uncertainty
        #         jedi_row[column + ' Depth Time'] = depth_time
        #
        #         # Determine dimming slope (if any)
        #         slope_path = output_path + 'Slope/'
        #         if not os.path.exists(slope_path):
        #             os.makedirs(slope_path)
        #
        #         slope_start_time = pd.Timestamp((goes_flare_events['peak_time'][flare_index]).iso)
        #         slope_end_time = depth_time
        #
        #         if (pd.isnull(slope_start_time)) or (pd.isnull(slope_end_time)):
        #             if verbose:
        #                 logger.warning('Cannot compute slope or duration because slope bounding times NaN.')
        #         else:
        #             plt.close('all')
        #             slope_min, slope_max, slope_mean = determine_dimming_slope(eve_line_event,
        #                                                                        earliest_allowed_time=slope_start_time,
        #                                                                        latest_allowed_time=slope_end_time,
        #                                                                        plot_path_filename='{0} Event {1} {2} Slope.png'.format(slope_path, flare_index, column),
        #                                                                        verbose=verbose, logger=logger)
        #
        #             jedi_row[column + ' Slope Min [%/s]'] = slope_min
        #             jedi_row[column + ' Slope Max [%/s]'] = slope_max
        #             jedi_row[column + ' Slope Mean [%/s]'] = slope_mean
        #             # jedi_row[column + ' Slope Uncertainty [%]'] = slope_uncertainty  # TODO: make determine_dimming_depth return the propagated uncertainty
        #             jedi_row[column + ' Slope Start Time'] = slope_start_time
        #             jedi_row[column + ' Slope End Time'] = slope_end_time
        #
        #             # Determine dimming duration (if any)
        #             duration_path = output_path + 'Duration/'
        #             if not os.path.exists(duration_path):
        #                 os.makedirs(duration_path)
        #
        #             plt.close('all')
        #             duration_seconds, duration_start_time, duration_end_time = determine_dimming_duration(eve_line_event,
        #                                                                                                   earliest_allowed_time=slope_start_time,
        #                                                                                                   plot_path_filename='{0} Event {1} {2} Duration.png'.format(duration_path, flare_index, column),
        #                                                                                                   verbose=verbose, logger=logger)
        #
        #             jedi_row[column + ' Duration [s]'] = duration_seconds
        #             jedi_row[column + ' Duration Start Time'] = duration_start_time
        #             jedi_row[column + ' Duration End Time'] = duration_end_time
        #
        #         if verbose:
        #             logger.info("Event {0} {1} parameterizations complete.".format(flare_index, column))
        #
        #         # Produce a summary plot for each light curve
        #         plt.style.use('jpm-transparent-light')
        #
        #         ax = eve_line_event['irradiance'].plot(color='black')
        #         plt.axhline(linestyle='dashed', color='grey')
        #         start_date = eve_line_event.index.values[0]
        #         start_date_string = pd.to_datetime(str(start_date))
        #         plt.xlabel(start_date_string.strftime('%Y-%m-%d %H:%M:%S'))
        #         plt.ylabel('Irradiance [%]')
        #         fmtr = dates.DateFormatter("%H:%M:%S")
        #         ax.xaxis.set_major_formatter(fmtr)
        #         ax.xaxis.set_major_locator(dates.HourLocator())
        #         plt.title('Event {0} {1} nm Parameters'.format(flare_index, column))
        #
        #         if not np.isnan(depth_percent):
        #             plt.annotate('', xy=(depth_time, -depth_percent), xycoords='data',
        #                          xytext=(depth_time, 0), textcoords='data',
        #                          arrowprops=dict(facecolor='limegreen', edgecolor='limegreen', linewidth=2))
        #             mid_depth = -depth_percent / 2.0
        #             plt.annotate('{0:.2f} %'.format(depth_percent), xy=(depth_time, mid_depth), xycoords='data',
        #                          ha='right', va='center', rotation=90, size=18, color='limegreen')
        #
        #         if not np.isnan(slope_mean):
        #             if pd.isnull(slope_start_time) or pd.isnull(slope_end_time):
        #                 import pdb
        #                 pdb.set_trace()
        #             p = plt.plot(eve_line_event[slope_start_time:slope_end_time]['irradiance'], c='tomato')
        #
        #             inverse_str = '$^{-1}$'
        #             plt.annotate('slope_min={0} % s{1}'.format(latex_float(slope_min), inverse_str),
        #                          xy=(0.98, 0.12), xycoords='axes fraction', ha='right',
        #                          size=12, color=p[0].get_color())
        #             plt.annotate('slope_max={0} % s{1}'.format(latex_float(slope_max), inverse_str),
        #                          xy=(0.98, 0.08), xycoords='axes fraction', ha='right',
        #                          size=12, color=p[0].get_color())
        #             plt.annotate('slope_mean={0} % s{1}'.format(latex_float(slope_mean), inverse_str),
        #                          xy=(0.98, 0.04), xycoords='axes fraction', ha='right',
        #                          size=12, color=p[0].get_color())
        #
        #         if not np.isnan(duration_seconds):
        #             plt.annotate('', xy=(duration_start_time, 0), xycoords='data',
        #                          xytext=(duration_end_time, 0), textcoords='data',
        #                          arrowprops=dict(facecolor='dodgerblue', edgecolor='dodgerblue', linewidth=5, arrowstyle='<->'))
        #             mid_time = duration_start_time + (duration_end_time - duration_start_time) / 2
        #             plt.annotate(str(duration_seconds) + ' s', xy=(mid_time, 0), xycoords='data', ha='center', va='bottom', size=18, color='dodgerblue')
        #
        #         summary_path = output_path + 'Summary Plots/'
        #         if not os.path.exists(summary_path):
        #             os.makedirs(summary_path)
        #         summary_filename = '{0} Event {1} {2} Parameter Summary.png'.format(summary_path, flare_index, column)
        #         plt.savefig(summary_filename)
        #         if verbose:
        #             logger.info("Summary plot saved to %s" % summary_filename)
        #
        # # Write to the JEDI catalog on disk
        # jedi_row.to_csv(csv_filename, header=False, index=False, mode='a')
        # if verbose:
        #     logger.info('Event {0} JEDI row written to {1}.'.format(flare_index, csv_filename))

        # Update progress bar
        progress_bar.update(flare_index)

    progress_bar.finish()

Пример #7

Показать файл

def determine_dimming_duration(light_curve_df,
                               earliest_allowed_time=None, smooth_points=0,
                               plot_path_filename=None, verbose=False, logger=None):
    """Find the duration of dimming in a light curve, if any.
    Assumes light curve is normalized such that pre-flare = 0%.

    Inputs:
        light_curve_df [pd DataFrame]:    A pandas DataFrame with a DatetimeIndex and a column for irradiance.

    Optional Inputs:
        earliest_allowed_time [metatime]: The function won't return a duration if the only 0 crossings are earlier than this.
                                          Default is None, meaning the beginning of the light_curve_df.
        smooth_points [integer]:          Used to apply a rolling mean with the number of points (indices) specified.
                                          Default is 0, meaning no smoothing will be performed.
        plot_path_filename [str]:         Set to a path and filename in order to save the summary plot to disk.
                                          Default is None, meaning the plot will not be saved to disk.
        verbose [bool]:                   Set to log the processing messages to disk and console. Default is False.
        logger [JpmLogger]:               A configured logger from jpm_logger.py. If set to None, will generate a
                                          new one. Default is None.

    Outputs:
        duration_seconds [integer]:         The duration of dimming in seconds.
        duration_start_time [pd.Timestamp]: The time the duration starts (downward 0 crossing).
        duration_end_time [pd.Timestamp]:   The time the duration ends (upward 0 crossing).

    Optional Outputs:
        None

    Example:
        duration_seconds, duration_start_time, duration_end_time = determine_dimming_duration(light_curve_df,
                                                                                              plot_path_filename='./bla.png',
                                                                                              verbose=True)
    """

    # If no earliest_allowed_time set, then set it to beginning of light_curve_df
    if not earliest_allowed_time:
        earliest_allowed_time = pd.Timestamp(light_curve_df.index.values[0])

    # Prepare the logger for verbose
    if verbose:
        if not logger:
            logger = JpmLogger(filename='determine_dimming_duration_log', path='/Users/jmason86/Desktop/')
        logger.info("Running on event with light curve start time of {0}.".format(light_curve_df.index[0]))

    # Set up a successful processing flag
    found_duration = True

    # Optionally smooth the light curve with a rolling mean
    if smooth_points:
        light_curve_df['smooth'] = light_curve_df.rolling(smooth_points, center=True).mean()
    else:
        light_curve_df['smooth'] = light_curve_df['irradiance']

    first_non_nan = light_curve_df['smooth'].first_valid_index()
    nan_indices = np.isnan(light_curve_df['smooth'])
    light_curve_df['smooth'][nan_indices] = light_curve_df['smooth'][first_non_nan]

    # Find the indices where the light curve is closest to 0
    zero_crossing_indices = np.where(np.diff(np.signbit(light_curve_df['smooth'])))[0]
    zero_crossing_times = light_curve_df.index[zero_crossing_indices]

    # Discard any indices prior to the user-provided earliest_allowed_time, else cannot compute
    zero_crossing_indices = zero_crossing_indices[zero_crossing_times > earliest_allowed_time]
    if zero_crossing_indices.size == 0:
        if verbose:
            logger.warning('No zero crossings detected after earliest allowed time of %s' % earliest_allowed_time)
        found_duration = False

    # Figure out which way the light curve is sloping
    if found_duration:
        light_curve_df['diff'] = light_curve_df['smooth'].diff()

    # Find the first negative slope zero crossing time
    if found_duration:
        neg_zero_crossing_indices = np.where(light_curve_df['diff'][zero_crossing_indices + 1] < 0)[0]
        if len(neg_zero_crossing_indices) > 0:
            first_neg_zero_crossing_index = neg_zero_crossing_indices[0]
            first_neg_zero_crossing_time = light_curve_df.index[zero_crossing_indices[first_neg_zero_crossing_index]]
        else:
            if verbose:
                logger.warning('No negative slope 0-crossing found. Duration cannot be defined.')
            found_duration = False

    # Find the first postiive slope zero crossing
    if found_duration:
        pos_zero_crossing_indices = np.where(light_curve_df['diff'][zero_crossing_indices + 1] > 0)[0]
        if len(pos_zero_crossing_indices) > 0:
            first_pos_zero_crossing_index = pos_zero_crossing_indices[0]
            first_pos_zero_crossing_time = light_curve_df.index[zero_crossing_indices[first_pos_zero_crossing_index]]
        else:
            if verbose:
                logger.warning('No positive slope 0-crossing found. Duration cannot be defined.')
            found_duration = False

    # If the first negative slope zero crossing isn't earlier than the positive one, return null
    if (found_duration) and (first_neg_zero_crossing_time > first_pos_zero_crossing_time):
        if verbose:
            logger.warning('Dimming light curve may be misaligned in window. Negative slope 0-crossing detected after positive one.')
        found_duration = False

    # Return the time difference in seconds between the selected zero crossings
    if found_duration:
        duration_seconds = int((first_pos_zero_crossing_time - first_neg_zero_crossing_time).total_seconds())

    if plot_path_filename:
        plt.style.use('jpm-transparent-light')
        from matplotlib import dates

        if found_duration:
            light_curve_df = light_curve_df.drop('diff', 1)

        ax = light_curve_df['irradiance'].plot()
        start_date = light_curve_df.index.values[0]
        start_date_string = pd.to_datetime(str(start_date))
        plt.xlabel(start_date_string.strftime('%Y-%m-%d %H:%M:%S'))
        plt.ylabel('Irradiance [%]')
        fmtr = dates.DateFormatter("%H:%M:%S")
        ax.xaxis.set_major_formatter(fmtr)
        ax.xaxis.set_major_locator(dates.HourLocator())
        plt.title('Dimming Duration')

        if found_duration:
            plt.scatter([zero_crossing_times[first_neg_zero_crossing_index], zero_crossing_times[first_pos_zero_crossing_index]],
                        [light_curve_df['smooth'][zero_crossing_indices[first_neg_zero_crossing_index]],
                         light_curve_df['smooth'][zero_crossing_indices[first_pos_zero_crossing_index]]],
                        c='black', s=300, zorder=3)
            plt.annotate('', xy=(first_neg_zero_crossing_time, 0), xycoords='data',
                         xytext=(first_pos_zero_crossing_time, 0), textcoords='data',
                         arrowprops=dict(facecolor='black', linewidth=5, arrowstyle='<->'))
            mid_time = first_neg_zero_crossing_time + (first_pos_zero_crossing_time - first_neg_zero_crossing_time) / 2
            plt.annotate(str(duration_seconds) + ' s', xy=(mid_time, 0), xycoords='data', ha='center', va='bottom', size=18)

        plt.savefig(plot_path_filename)
        if verbose:
            logger.info("Summary plot saved to %s" % plot_path_filename)

    if not found_duration:
        duration_seconds = np.nan
        first_neg_zero_crossing_time = np.nan
        first_pos_zero_crossing_time = np.nan

    return duration_seconds, first_neg_zero_crossing_time, first_pos_zero_crossing_time

Пример #8

Показать файл

def determine_preflare_irradiance(light_curve_df,
                                  estimated_time_of_peak_start,
                                  max_median_diff_threshold=1.5,
                                  std_threshold=1.0,
                                  plot_path_filename=None,
                                  verbose=False,
                                  logger=None):
    """Determine pre-flare irradiance level in a solar light curve.
    Or, more generally, find the pre-peak level in a time series.

    Inputs:
        light_curve_df [pd DataFrame]:           A pandas DataFrame with a DatetimeIndex and a column for irradiance.
        estimated_time_of_peak_start [metatime]: The estimated time that the dramatic increase starts.
                                                 This could come from, e.g., GOES/XRS.

    Optional Inputs:
        max_median_diff_threshold [float]: The maximum allowed difference in medians between the 3 pre-flare windows
                                           in percent terms. This value gets multiplied by the mean of the stds from
                                           each sub-window and is then compared to the max_median_diff. The default is 1.5.
        std_threshold [float]:             The maximum allowed standard deviation in the pre-flare windows in percent
                                           terms. The default is 0.5.
        plot_path_filename [str]:          Set to a path and filename in order to save the summary plot to disk.
                                           Default is None, meaning the plot will not be saved to disk.
        verbose [bool]:                    Set to log the processing messages to disk and console. Default is False.
        logger [JpmLogger]:                A configured logger from jpm_logger.py. If set to None, will generate a
                                           new one. Default is None.

    Outputs:
        preflare_irradiance [float]: The identified pre-flare irradiance level in the same units as light_curve_df.irradiance.

    Optional Outputs:
        None

    Example:
        preflare_irradiance = determine_preflare_irradiance(light_curve_df, pd.Timestamp('2012-04-15 17:52:20.0'),
                                                            plot_path_filename='./bla.png',
                                                            verbose=True)
    """

    # Prepare the logger for verbose
    if verbose:
        if not logger:
            logger = JpmLogger(filename='determine_preflare_irradiance_log',
                               path='/Users/jmason86/Desktop/')
        logger.info("Running on event with peak start time of {0}.".format(
            estimated_time_of_peak_start))

    # Verify that not all values are nan
    if light_curve_df.isna().all().all():
        if verbose:
            logger.warning("All irradiance values are NaN. Returning.")
        return np.nan

    # Convert irradiance to percent if not already present
    if 'irradiance_percent' not in light_curve_df.columns:
        median_irradiance = light_curve_df['irradiance'].median()
        light_curve_df['irradiance_percent'] = (
            light_curve_df['irradiance'].values -
            median_irradiance) / median_irradiance * 100.
        if verbose:
            logger.info(
                "Converted irradiance to percent, baselining median in entire pre-flare window."
            )

    # Divide the pre-flare period into 3 equal-length windows
    windows = np.array_split(light_curve_df[:estimated_time_of_peak_start], 3)
    if verbose:
        logger.info("Divided pre-flare period into 3 equal-length windows.")

    # Compute median and standard deviation in each window
    medians = [
        windowed_df['irradiance_percent'].median() for windowed_df in windows
    ]
    medians_abs = [
        windowed_df['irradiance'].median() for windowed_df in windows
    ]
    stds = np.array(
        [windowed_df['irradiance_percent'].std() for windowed_df in windows])
    if verbose:
        logger.info("Computed medians and standard deviations in each window.")

    # Compute max difference between the medians
    max_median_diff = np.max(
        np.abs(np.append(np.diff(medians), medians[2] - medians[0])))

    # Compare medians and standard deviations in each window to thresholds
    failed_median_threshold = False
    failed_std_threshold = False
    if np.all(np.isnan(stds)):
        if verbose:
            logger.warning(
                'Cannot compute pre-flare irradiance. All standard deviations are nan.'
            )
        failed_std_threshold = True
    else:
        if max_median_diff > max_median_diff_threshold * np.mean(stds):
            if verbose:
                logger.warning(
                    'Cannot compute pre-flare irradiance. Maximum difference in window medians ({0}) exceeded threshold ({1}).'
                    .format(max_median_diff,
                            max_median_diff_threshold * np.mean(stds)))
            failed_median_threshold = True
        if (stds < std_threshold).sum() < 2:
            if verbose:
                logger.warning(
                    'Cannot compute pre-flare irradiance. Standard deviation in more than 1 window is larger than threshold ({0}).'
                    .format(std_threshold))
            failed_std_threshold = True

    # Compute pre-flare irradiance (mean of the medians in absolute units)
    if failed_median_threshold or failed_std_threshold:
        preflare_irradiance = np.nan
    else:
        preflare_irradiance = np.mean(
            [windowed_df['irradiance'].median() for windowed_df in windows])
        if verbose:
            logger.info("Computed pre-flare irradiance: {0}".format(
                preflare_irradiance))

    # Produce summary plot
    if plot_path_filename:
        plt.style.use('jpm-transparent-light')
        from matplotlib import dates
        from matplotlib.patches import Rectangle

        light_curve_df = light_curve_df.drop('irradiance_percent', 1)
        ax = light_curve_df[:estimated_time_of_peak_start].plot(legend=False,
                                                                c='grey')
        #  plt.plot(light_curve_df[:estimated_time_of_peak_start].irradiance, c='grey') #  using matplotlib instead of pandas
        #  ax = plt.gca()
        start_date = light_curve_df.index.values[0]
        start_date_string = pd.to_datetime(str(start_date))
        plt.title('Pre-flare Windows')
        plt.xlabel(start_date_string.strftime('%Y-%m-%d %H:%M:%S'))
        plt.ylabel('Irradiance [W m$^{-2}$]')
        fmtr = dates.DateFormatter("%H:%M:%S")
        ax.xaxis.set_major_formatter(fmtr)
        ax.xaxis.set_major_locator(dates.HourLocator())
        ax2 = ax.twinx()
        light_curve_df[:estimated_time_of_peak_start].plot(ax=ax2,
                                                           legend=False,
                                                           c='grey')
        #  ax2.plot(light_curve_df[:estimated_time_of_peak_start].irradiance, color='grey')
        vals = ax2.get_yticks()
        ax2.set_yticklabels([
            '{:3.2f}%'.format(
                (x - median_irradiance) / median_irradiance * 100)
            for x in vals
        ])

        # First window
        start = dates.date2num(light_curve_df.index[0])
        end = dates.date2num(windows[0].index[-1])
        width = end - start
        rect = Rectangle((start, 0), width, 1, color='deepskyblue', alpha=0.2)
        ax.add_patch(rect)
        plt.plot([windows[0].index[0], windows[0].index[-1]],
                 [medians_abs[0], medians_abs[0]],
                 linestyle='dashed',
                 c='dimgrey')
        ax.text(start + width / 2.0,
                np.min(
                    light_curve_df[:estimated_time_of_peak_start].irradiance),
                'median$_1$ = ' + latex_float(medians[0]) + '% \n' +
                '$\sigma_1$ = ' + latex_float(stds[0]) + '%',
                fontsize=11,
                ha='center',
                va='bottom')

        # Second window
        start = dates.date2num(windows[1].index[0])
        end = dates.date2num(windows[1].index[-1])
        width = end - start
        rect = Rectangle((start, 0), width, 1, color='slateblue', alpha=0.2)
        ax.add_patch(rect)
        plt.plot([windows[1].index[0], windows[1].index[-1]],
                 [medians_abs[1], medians_abs[1]],
                 linestyle='dashed',
                 c='dimgrey')
        ax.text(start + width / 2.0,
                np.min(
                    light_curve_df[:estimated_time_of_peak_start].irradiance),
                'median$_2$ = ' + latex_float(medians[1]) + '% \n' +
                '$\sigma_2$ = ' + latex_float(stds[1]) + '%',
                fontsize=11,
                ha='center',
                va='bottom')

        if not np.isnan(preflare_irradiance):
            ax.axes.axhline(y=preflare_irradiance,
                            linewidth=2,
                            color='tomato',
                            linestyle='dashed')
            ax.text(
                start + width / 2.0,
                np.max(
                    light_curve_df[:estimated_time_of_peak_start].irradiance),
                'pre-flare I = ' + latex_float(preflare_irradiance) +
                ' W m$^{-2}$',
                fontsize=11,
                ha='center',
                va='top',
                color='tomato')
        else:
            ax.text(
                start + width / 2.0,
                np.max(
                    light_curve_df[:estimated_time_of_peak_start].irradiance),
                'pre-flare I = N/A \n' + 'median condition ok: ' +
                str(not failed_median_threshold) + '\n' +
                '$\sigma$ condition ok: ' + str(not failed_std_threshold),
                fontsize=11,
                ha='center',
                va='top',
                color='tomato')

        # Third window
        start = dates.date2num(windows[2].index[0])
        end = dates.date2num(windows[2].index[-1])
        width = end - start
        rect = Rectangle((start, 0), width, 1, color='violet', alpha=0.2)
        ax.add_patch(rect)
        plt.plot([windows[2].index[0], windows[2].index[-1]],
                 [medians_abs[2], medians_abs[2]],
                 linestyle='dashed',
                 c='dimgrey')
        ax.text(start + width / 2.0,
                np.min(
                    light_curve_df[:estimated_time_of_peak_start].irradiance),
                'median$_3$ = ' + latex_float(medians[2]) + '% \n' +
                '$\sigma_3$ = ' + latex_float(stds[2]) + '%',
                fontsize=11,
                ha='center',
                va='bottom')
        ax.text(end,
                np.max(
                    light_curve_df[:estimated_time_of_peak_start].irradiance),
                'median diff = ' + latex_float(max_median_diff) + '% \n' +
                r'thresh $\times \mu_{\sigma n}$ = ' +
                latex_float(max_median_diff_threshold * np.mean(stds)) + '%',
                fontsize=11,
                ha='right',
                va='top')

        # Increase border so y-axes don't get cut off in savefig, even though they don't in plt.show()
        plt.gcf().subplots_adjust(left=0.22)

        plt.savefig(plot_path_filename)
        if verbose:
            logger.info(
                "Summary plot for event with start time {0} saved to {1}".
                format(estimated_time_of_peak_start, plot_path_filename))

    return preflare_irradiance

Пример #9

Показать файл

Файл: CorrelationCoefficientScan.py Проект: tyleralbee/Stealth-CME-Detection

def correlationCoefficientScan(
        output_path='/Users/tyleralbee/Desktop/StealthCME',
        eve_data_path='/Users/tyleralbee/Desktop/savesets/eve_selected_lines.csv',
        cme_signature='/Users/tyleralbee/Desktop/savesets/eve_lines_event_percents_fitted.csv',
        verbose=True):

    eve_lines = pd.read_csv(eve_data_path, index_col=0)
    eve_lines.index = pd.to_datetime(eve_lines.index)
    wholeDfLength = eve_lines.__len__()

    cme_event = pd.read_csv(cme_signature, index_col=0)
    cme_event.index = pd.to_datetime(cme_event.index)
    cmeEventLength = cme_event.__len__()

    if verbose:
        logger = JpmLogger(filename='do_correlation_coefficient_scan',
                           path=output_path,
                           console=True)
        logger.info("Starting Stealth CME search pipeline!")
    else:
        logger = None

    if verbose:
        logger.info('Loaded EVE and CME data')

    # Define the columns of the output catalog
    output_table = pd.DataFrame(columns=[
        'Event #', 'Start Time', 'End Time', 'Correlation Coefficient'
    ])
    csv_filename = output_path + 'cc_output_{0}.csv'.format(Time.now().iso)
    output_table.to_csv(csv_filename, header=True, index=False, mode='w')

    if verbose:
        logger.info('Created output table definition.')

    # Start a progress bar
    widgets = [
        progressbar.Percentage(),
        progressbar.Bar(),
        progressbar.Timer(), ' ',
        progressbar.AdaptiveETA()
    ]

    startRow = 0
    endRow = cmeEventLength
    numSlices = int(wholeDfLength / cmeEventLength)
    output_row = 1

    progress_bar_sliding_window = progressbar.ProgressBar(
        widgets=[progressbar.FormatLabel('Correlation Coefficient Analysis ')
                 ] + widgets,
        max_value=numSlices).start()

    # ----------Loop through data set using a sliding time window-------------------------------------------------------

    for i in range(1, numSlices):

        # ----------Clip dataset to time slice window-------------------------------------------------------------------

        event_time_slice = eve_lines.iloc[startRow:endRow]

        # ---------Convert irradiance values to percentages-------------------------------------------------------------

        preflare_irradiance = event_time_slice.iloc[0]
        event_time_slice_percentages = (event_time_slice - preflare_irradiance
                                        ) / preflare_irradiance * 100.0

        if verbose:
            logger.info(
                "Event {0} irradiance converted from absolute to percent units."
                .format(i))

        # ---------Fit light curves to reduce noise---------------------------------------------------------------------

        uncertainty = np.ones(len(event_time_slice_percentages)
                              ) * 0.002545  # got this line from James's code

        progress_bar_fitting = progressbar.ProgressBar(
            widgets=[progressbar.FormatLabel('Light curve fitting: ')] +
            widgets,
            max_value=len(event_time_slice_percentages.columns)).start()

        for j, column in enumerate(event_time_slice_percentages):
            if event_time_slice_percentages[column].isnull().all().all():
                if verbose:
                    logger.info(
                        'Event {0} {1} fitting skipped because all irradiances are NaN.'
                        .format(j, column))
            else:
                eve_line_event_percentages = pd.DataFrame(
                    event_time_slice_percentages[column])
                eve_line_event_percentages.columns = ['irradiance']
                eve_line_event_percentages['uncertainty'] = uncertainty

                fitting_path = output_path + 'Fitting/'
                if not os.path.exists(fitting_path):
                    os.makedirs(fitting_path)

                plt.close('all')
                light_curve_fit, best_fit_gamma, best_fit_score = automatic_fit_light_curve(
                    eve_line_event_percentages,
                    plots_save_path='{0} Event {1} {2} '.format(
                        fitting_path, j, column),
                    verbose=verbose,
                    logger=logger)
                event_time_slice_percentages[column] = light_curve_fit
                event_time_slice_fitted = event_time_slice_percentages  # Keep our variable names explicit

                if verbose:
                    logger.info('Event {0} {1} light curves fitted.'.format(
                        j, column))
                progress_bar_fitting.update(j)

        progress_bar_fitting.finish()

        if verbose:
            logger.info("Event {0} Light curves fitted".format(i))

        # ---------Compute Correlation Coefficients---------------------------------------------------------------------

        totalCorrelationCoefficient = 0.0
        ds1 = event_time_slice_fitted
        ds2 = cme_event

        # Gather stats for correlation
        for k, column in enumerate(ds1):
            dsColumn1 = ds1[column]
            dsColumn2 = ds2[column]

            dsColumn1.reset_index(
                drop=True, inplace=True)  # prevent NaNs from appearing in join
            dsColumn2.reset_index(
                drop=True, inplace=True)  # prevent NaNs from appearing in join

            # TODO: assert that both columns have same count?
            n = int(dsColumn1.count())
            meanA = float(dsColumn1.mean())
            meanB = float(dsColumn2.mean())
            stdA = float(dsColumn1.std(ddof=0))
            stdB = float(dsColumn2.std(ddof=0))

            # Generate correlation output
            dsJoined = pd.DataFrame({
                'a': dsColumn1,
                'b': dsColumn2
            })  # Avoids ambiguity when attr names are the same
            numerator = 0.0  # Stores summation of (a_i - meanA)(b_i - meanB)
            denominator = n * stdA * stdB

            for index, row in dsJoined.iterrows():
                a = row['a']
                b = row['b']
                numerator = numerator + (a - meanA) * (b - meanB)

            correlationCoefficient = numerator / denominator
            totalCorrelationCoefficient = totalCorrelationCoefficient + correlationCoefficient

        # ---------Output Results---------------------------------------------------------------------------------------

        eventStartTime = event_time_slice.iloc[0].name
        eventEndTime = event_time_slice.iloc[-1].name

        if not math.isnan(totalCorrelationCoefficient
                          ) and totalCorrelationCoefficient >= 4.2:
            output_table.loc[output_row] = [
                output_row, eventStartTime, eventEndTime,
                totalCorrelationCoefficient
            ]
            csv_filename = output_path + 'cc_output_{0}.csv'.format(
                Time.now().iso)
            output_table.to_csv(csv_filename,
                                header=True,
                                index=False,
                                mode='w')
            output_row = output_row + 1

        startRow = startRow + 60  # advance time window by 1 hour
        endRow = endRow + 60  # advance time window by 1 hour
        progress_bar_sliding_window.update(i)  # advance progress bar