示例#1
0
def make_omegascan(ifo, t0, durs):
    """Helper function to create a single omegascan image, with
    multiple durations.

    Parameters
    ----------
    ifo : str
        'H1', 'L1', or 'V1'
    t0 : int or float
        Central time of the omegascan.
    durs : list of floats/ints
        List of three durations which will be scanned symmetrically about t0.
        Example: [0.5, 2, 10]

    Returns
    -------
    bytes or None
        bytes of png of the omegascan, or None if no omegascan created.

    """
    # Explicitly use a non-interactive Matplotlib backend.
    plt.switch_backend('agg')

    # Collect data
    longest = max(durs)
    long_start, long_end = t0 - longest, t0 + longest
    cache = create_cache(ifo, long_start, long_end)
    strain_name = app.conf['strain_channel_names'][ifo]
    try:
        ts = TimeSeries.read(cache, strain_name,
                             start=long_start, end=long_end).astype('float64')
        # Do q_transforms for the different durations
        qgrams = [ts.q_transform(
            frange=(20, 4096), gps=t0, outseg=(t0 - dur, t0 + dur), logf=True)
            for dur in durs]
    except (IndexError, FloatingPointError, ValueError):
        # data from cache can't be properly read, or data is weird
        fig = plt.figure()
        plt.axis("off")
        plt.text(0.1, 0.45, f"Failed to create {ifo} omegascan", fontsize=17)
    else:
        fig = Plot(*qgrams,
                   figsize=(10 * len(durs), 5),
                   geometry=(1, len(durs)),
                   yscale='log',
                   method='pcolormesh')
        for ax in fig.axes:
            fig.colorbar(ax=ax, label='Normalized energy', clim=(0, 30))
            ax.set_epoch(t0)
        fig.suptitle(f'Omegascans of {strain_name} at {t0}', fontweight="bold")

    outfile = io.BytesIO()
    fig.savefig(outfile, format='png', dpi=300)
    return outfile.getvalue()
示例#2
0
def plot_spectra(clusters,
                 channel,
                 unit='cts',
                 xlog=True,
                 legend=None,
                 xlim=None,
                 **kwargs):
    from glob import glob
    from gwpy.frequencyseries import FrequencySeries
    from gwpy.plot import Plot
    title = channel
    psds = {}
    for cluster in clusters:
        for filename in glob('*.hdf5'):
            try:
                psds[cluster] = FrequencySeries.read(filename,
                                                     f'{cluster}-{channel}')
                print(f'found in {filename}')
                break
            except KeyError:
                continue
        else:
            raise KeyError(f'Could not find Nº{cluster}')

    if legend is None:
        legend = clusters

    # plot the group in one figure.
    plt = Plot(*(psds[cluster] for cluster in psds),
               separate=False,
               sharex=True,
               zorder=1,
               **kwargs)
    if xlim is not None:
        plt.gca().set_xlim(xlim)
    plt.gca().set_ylim((1e-48, 1e-37))
    # modify the figure as a whole.
    # plt.add_segments_bar(dq, label='')
    # plt.gca().set_color_cycle(['red', 'green', 'blue', 'yellow'])
    if xlog:
        plt.gca().set_xscale('log')
    plt.gca().set_yscale('log')
    plt.gca().set_ylabel(f'Power Spectral Density [{unit}^2/Hz]')
    plt.suptitle(title)
    plt.legend(legend, prop={'size': 15})

    # save to png.
    plt.save(f'{title}.png')
示例#3
0
def plot_timeseries(*data, **kwargs):
    title = kwargs.pop('title', None)
    ylim = kwargs.pop('ylim', None)
    fname = kwargs.pop('fname', 'TimeSeries.png')
    plot = Plot(figsize=(15, 10))
    ax0 = plot.gca()
    ax0.plot(*data)
    ax0.legend([text.to_string(_data.name) for _data in data], fontsize=20)
    ax0.set_xscale('auto-gps')
    ax0.set_ylabel(text.to_string(data[0].unit))
    plot.add_state_segments(segments_daq_iy0_ok, label='IY0 DAQ State')
    plot.add_state_segments(segments_daq_ix1_ok, label='IX1 DAQ State')
    plot.suptitle(title, fontsize=40)
    if ylim:
        ax0.set_ylim(ylim[0], ylim[1])
    plot.savefig(fname)
    plot.close()
示例#4
0
def representative_spectra(channels,
                           start,
                           stop,
                           rate,
                           label='kmeans-labels',
                           filename=DEFAULT_FILENAME,
                           prefix='.',
                           downloader=TimeSeriesDict.get,
                           cluster_numbers=None,
                           groups=None,
                           **kwargs):
    """
    Make representative spectra for each cluster based on the median psd for minutes in that cluster.
    Downloads only the raw minutes in the cluster to save.
    """
    if groups is None:
        groups = channels

    # read the labels from the save file.
    labels = TimeSeries.read(filename,
                             label,
                             start=to_gps(start),
                             end=to_gps(stop))
    logger.info(f'Read labels {start} to {stop} from {filename}')

    if cluster_numbers is None:
        clusters = list(range(max(labels.value) + 1))

        cluster_counts = list(
            len(labels.value[labels.value == c]) for c in clusters)
        largest_cluster = cluster_counts.index(max(cluster_counts))
        clusters.remove(largest_cluster)

        logger.info(
            f'Largest cluster found to be Nº{largest_cluster} ({100 * max(cluster_counts) // len(labels.value)}%). Doing {clusters}.'
        )
        cluster_counts.remove(max(cluster_counts))
    else:
        clusters = cluster_numbers
        cluster_counts = list(
            len(labels.value[labels.value == c]) for c in clusters)

    t, v, d = labels.times, labels.value, diff(labels.value)

    pairs = list(
        zip([t[0]] + list(t[:-1][d != 0]),
            list(t[1:][d != 0]) + [t[-1]]))
    values = list(v[:-1][d != 0]) + [v[-1]]
    assert len(pairs) == len(values)  # need to include start-| and |-end
    # l|r l|r l|r l|r
    # l,r l,r l,r l,r
    # l r,l r,l r,l r # zip(start + l[1:], r[:-1] + stop)

    print(pairs)
    for pair in pairs:
        print(int(pair[1].value) - int(pair[0].value))
    print(values)

    # use h5py to make a mutable object pointing to a file on disk.
    save_file, filename = path2h5file(
        get_path(f'spectra-cache {start}', 'hdf5', prefix=prefix))
    logger.debug(f'Initiated hdf5 stream to {filename}')

    logger.info(f'Patching {filename}...')
    for i, (dl_start, end) in enumerate(pairs):
        if values[i] in clusters:
            if not data_exists(channels, to_gps(end).seconds, save_file):
                logger.debug(
                    f'Downloading Nº{values[i]} from {dl_start} to {end}...')
                try:
                    dl = downloader(channels,
                                    start=to_gps(dl_start) - LIGOTimeGPS(60),
                                    end=to_gps(end) + LIGOTimeGPS(seconds=1))
                    out = TimeSeriesDict()
                    for n in dl:
                        out[n] = dl[n].resample(**better_aa_opts(dl[n], rate))
                    write_to_disk(out, to_gps(dl_start).seconds, save_file)
                except RuntimeError:  # Cannot find all relevant data on any known server
                    logger.warning(
                        f"SKIPPING Nº{values[i]} from {dl_start} to {end} !!")

    logger.info('Reading data...')
    data = TimeSeriesDict.read(save_file, channels)

    logger.info('Starting PSD generation...')

    f = data[channels[0]].crop(
        start=to_gps(data[channels[0]].times[-1]) - LIGOTimeGPS(60),
        end=to_gps(data[channels[0]].times[-1])).psd().frequencies

    d = (to_gps(labels.times[-1]).seconds - to_gps(labels.times[1]).seconds)
    for i, cluster in enumerate(clusters):
        try:
            psds = {
                channel: FrequencySeries.read(filename, f'{cluster}-{channel}')
                for channel in channels
            }
            logger.info(f'Loaded Nº{cluster}.')

        except KeyError:

            logger.info(
                f'Doing Nº{cluster} ({100 * cluster_counts[i] / len(labels.value):.2f}% of data)...'
            )
            with Progress(f'psd Nº{cluster} ({i + 1}/{len(clusters)})',
                          len(channels) * d) as progress:
                psds = {
                    channel: FrequencySeries(median(stack([
                        progress(data[channel].crop,
                                 pc * d + (to_gps(time).seconds -
                                           to_gps(labels.times[1]).seconds),
                                 start=to_gps(time) - LIGOTimeGPS(60),
                                 end=to_gps(time)).psd().value
                        for c, time in zip(labels.value, labels.times)
                        if c == cluster
                    ]),
                                                    axis=0),
                                             frequencies=f,
                                             name=f'{cluster}-{channel}')
                    for pc, channel in enumerate(channels)
                }
            for name in psds.keys():
                psds[name].write(filename, **writing_opts)

        # plotting is slow, so show a nice progress bar.
        logger.debug('Initiating plotting routine...')
        with Progress('plotting', len(groups)) as progress:

            for p, (group, lbls, title) in enumerate(groups):
                # plot the group in one figure.
                plt = Plot(*(psds[channel] for channel in group),
                           separate=False,
                           sharex=True,
                           zorder=1,
                           **kwargs)
                # plt.gca().set_xlim((30,60))
                # modify the figure as a whole.
                # plt.add_segments_bar(dq, label='')
                plt.gca().set_xscale('log')
                plt.gca().set_yscale('log')
                plt.suptitle(title)
                plt.legend(lbls)

                # save to png.
                progress(
                    plt.save, p,
                    get_path(f'{cluster}-{title}',
                             'png',
                             prefix=f'{prefix}/{cluster}'))
示例#5
0
def cluster_plotter(channels,
                    start,
                    stop,
                    prefix='.',
                    label='kmeans-labels',
                    groups=None,
                    filename=DEFAULT_FILENAME,
                    dqflag='L1:DMT-ANALYSIS_READY:1',
                    xscale=None,
                    unit=None,
                    progressbar=True,
                    **kwargs):
    """
    Plots data with clusters labeled by color in the working directory, or a relative path given by prefix.
    Requires a .hdf5 file produced with a clustering function defined in this module to be in the working directory.
    **kwargs are forwarded to TimeSeries.plot().

    :param prefix: relative path to output images.
    :param label: name attribute of labels TimeSeries saved in filename.
    :param groups: groups of channels to plot in the same figure. See the example.
    :param dqflag: data quality flag for segments bar.
    :param xscale: gps x-axis scale to use.
    :param unit: override y-axis unit.
    :param progressbar: show progress bar.

    >>> from gwpy.time import tconvert, from_gps
    >>> from datetime import timedelta
    >>> from cluster import cluster_plotter
    >>>
    >>> channels = [f'L1:ISI-GND_STS_ETMX_Z_BLRMS_1_3.mean,m-trend', 'L1:ISI-GND_STS_ETMY_Z_BLRMS_1_3.mean,m-trend']
    >>> groups = [[channels, ('ETMX', 'ETMY'), 'L1:ISI-GND_STS_BLRMS_1_3 Z-axis']] # plot on the same figure.
    >>>
    >>> stop = from_gps(60 * (int(tconvert('now')) // 60)) # gets nearest minute to now
    >>> start = stop - timedelta(days=1)  # cluster the past day
    >>> cluster_plotter(channels, start, stop, filename='my_kmeans.hdf5', groups=groups)

    """

    # some defaults.
    if not kwargs:
        kwargs['color'] = 'k'
        kwargs['alpha'] = 0.3
    if groups is None:
        groups = channels

    # read the data from the save file.
    data = TimeSeriesDict.read(filename,
                               channels + [label],
                               start=to_gps(start),
                               end=to_gps(stop))
    logger.info(f'Read {start} to {stop} from {filename}')

    # get segments for the duration specified. Note that this may require doing `ligo-proxy-init -p`.
    logger.debug(f'Getting segments for {dqflag} from {start} to {stop}...')
    dq = DataQualityFlag.query(dqflag, to_gps(start), to_gps(stop))
    logger.info(f'Got segments for {dqflag} from {start} to {stop}.')

    # plotting is slow, so show a nice progress bar.
    logger.debug('Initiating plotting routine...')
    with Progress('plotting', len(channels),
                  quiet=not progressbar) as progress:

        for p, (group, labels, title) in enumerate(groups):

            # plot the group in one figure.
            plt = Plot(*(data[channel] for channel in group),
                       separate=True,
                       sharex=True,
                       zorder=1,
                       **kwargs)

            # modify the axes one by one.
            axes = plt.get_axes()
            for i, ax in enumerate(axes):

                # namely, add a colored overlay that indicates clustering labels.
                ax.scatter(data[group[i]].times,
                           data[group[i]].value,
                           c=[colors[j] for j in data[label]],
                           edgecolor='',
                           s=4,
                           zorder=2)

                ax.set_ylabel(
                    f'{labels[i]} {data[group[i]].unit if unit is None else unit}'
                )
                setp(ax.get_xticklabels(), visible=False)

            # modify the figure as a whole.
            plt.add_segments_bar(dq, label='')
            if xscale is not None:
                plt.gca().set_xscale(xscale)
            plt.suptitle(title)

            # save to png.
            progress(plt.save, p, get_path(title, 'png', prefix=prefix))

    logger.info(f'Completed plotting for {start} to {stop} from {filename}')