def dump_calibrated_data(fname):
    data = numpy.load(fname)

    # Figure out the times covered by the file from the filename
    # I should start using HDF5 so I can store metadata
    temp = fname.split('.')[0]
    temp = temp.split('-')
    ifo = temp[0]
    st, dur = int(temp[-2]), int(temp[-1])
    et = st + dur

    maxidx = len(data)
    width = 45

    weights = 1. - ((numpy.arange(-width, width) / float(width))**2)

    # The VCO frequencies are integers so we could dither them
    # to avoid quantization error if we wanted to be fancy
    # but it seems to make no differece
    if False:
        from numpy.random import triangular
        data[:, 1] += triangular(-1., 0., 1., size=len(data))

    # Just fit the whole thing at once, to get a single coefficient
    a, b = numpy.polyfit(data[:, 0], data[:, 1], 1)
    print "%.1f %u" % (a, b)

    # Slide through the data fitting PSL to IMC for data around each sample
    coeffs = []
    for idx in xrange(maxidx):
        idx1 = max(0, idx - width)
        idx2 = min(idx + width, maxidx)
        coeffs.append(numpy.polyfit(data[idx1:idx2, 0], data[idx1:idx2, 1], 1,
                                    w=weights[idx1 - idx + width:idx2 - idx + width]))
    coeffs = numpy.array(coeffs)
    times = numpy.arange(len(coeffs)) + 0.5
    connection = datafind.GWDataFindHTTPConnection()
    cache = connection.find_frame_urls(
        ifo[0], '%s_R' % ifo, st, et, urltype='file')

    imc = TimeSeries.read(cache, "%s:IMC-F_OUT_DQ" % ifo, st, et)
    imc = imc[::16384 / 256]
    print imc
    samp_times = numpy.arange(len(imc)) / 256.

    coeffs0 = numpy.interp(samp_times, times, coeffs[:, 0])
    coeffs1 = numpy.interp(samp_times, times, coeffs[:, 1]) - 7.6e7

    vco_interp = coeffs0 * imc.data + coeffs1

    chan = "%s:IMC-VCO_PREDICTION" % (ifo,)
    vco_data = TimeSeries(vco_interp, epoch=st,
                          sample_rate=imc.sample_rate.value,
                          name=chan, channel=chan)
    vco_data.write("%s-vcoprediction-%u-%u.hdf" % (ifo, st, dur), format='hdf')
示例#2
0
    def test_read_frame_file(self):
        start_time = 0
        end_time = 10
        channel = "H1:GDS-CALIB_STRAIN"
        N = 100
        times = np.linspace(start_time, end_time, N)
        data = np.random.normal(0, 1, N)
        ts = TimeSeries(data=data, times=times, t0=0)
        ts.channel = Channel(channel)
        ts.name = channel
        filename = os.path.join(self.outdir, "test.gwf")
        ts.write(filename, format="gwf")

        # Check reading without time limits
        strain = gwutils.read_frame_file(
            filename, start_time=None, end_time=None, channel=channel
        )
        self.assertEqual(strain.channel.name, channel)
        self.assertTrue(np.all(strain.value == data[:-1]))

        # Check reading with time limits
        start_cut = 2
        end_cut = 8
        strain = gwutils.read_frame_file(
            filename, start_time=start_cut, end_time=end_cut, channel=channel
        )
        idxs = (times > start_cut) & (times < end_cut)
        # Dropping the last element - for some reason gwpy drops the last element when reading in data
        self.assertTrue(np.all(strain.value == data[idxs][:-1]))

        # Check reading with unknown channels
        strain = gwutils.read_frame_file(filename, start_time=None, end_time=None)
        self.assertTrue(np.all(strain.value == data[:-1]))

        # Check reading with incorrect channel
        strain = gwutils.read_frame_file(
            filename, start_time=None, end_time=None, channel="WRONG"
        )
        self.assertTrue(np.all(strain.value == data[:-1]))

        ts = TimeSeries(data=data, times=times, t0=0)
        ts.name = "NOT-A-KNOWN-CHANNEL"
        ts.write(filename, format="gwf")
        strain = gwutils.read_frame_file(filename, start_time=None, end_time=None)
        self.assertEqual(strain, None)
示例#3
0
def on_key(event):
    global freq_out
    # early exit for standard shortcut keys

    if event.key in 'ophxysq':
        return

    global nfft, navg, cid_click, state
    print('you pressed', event.key)
    '''
    if state in (1,2,3):
        if event.key in '=+' and points[state].order < 5:
            points[state].order += 1
        if event.key in '-_' and points[state].order > 1:
            points[state].order -= 1
        points[state].draw()'''
    
    if event.key == ',':
        nfft = next_smaller(allowed_nfft, nfft)
        update_specgram()
    if event.key == '.':
        nfft = next_bigger(allowed_nfft, nfft)
        update_specgram()
    
    if event.key == '[' and navg > 1:
        navg -= 1
        update_specgram()
    if event.key == ']':
        navg += 1
        update_specgram()
    
    if event.key == 'd' and state:
        points[state].clear()
     
    if event.key in '01239':
        if cid_click is not None:
            fig.canvas.mpl_disconnect(cid_click)
            cid_click = None

        state = int(event.key)
        if state:
            cid_click = fig.canvas.mpl_connect('button_press_event', points[state].onclick)

    if event.key == 'm':
        if len(max_dots.get_xdata()):
            print 'removing maxima'
            max_dots.set_data([], [])
        else:
            fmin = points[2].extrapolate(T)
            if fmin is None:
                fmin = np.full_like(T, -np.inf)
            fmax = points[3].extrapolate(T)
            if fmax is None:
                fmax = np.full_like(T, np.inf)
            
            if not np.all(fmax > fmin):
                print 'line 3 must be above line 2'
                return
            
            ZZ = Z.copy()
            for itime in range(len(T)):
                ZZ[:, itime][(fmin[itime] > F) | (F > fmax[itime])] = -np.inf
            
            imx = ZZ.argmax(axis=0)
            fpeak = F[imx]
            fpeak[~np.all(np.isfinite(Z), axis=0)] = np.nan
            max_dots.set_data(T, fpeak)
    if event.key == 'M':
        if not len(max_dots.get_xdata()):
            print 'No maxima to save'
            return

        # recover data from plot
        tmax, fpeak = max_dots.get_data()
        # FIXME: shift by half delta t? specgram gives middle of bin, while frame convention os beginning of bin
        freq_out = TimeSeries(fpeak, unit='Hz', dt=tmax[1]-tmax[0], t0=data.t0.value + tmax[0])
        fname = 'spectool_freq.h5'
        print 'Saving frequency to file', fname
        freq_out.write(fname, path='peak_freq')

    if event.key == 'b':
        if len(brms_dots.get_xdata()):
            brms_dots.set_data([], [])
        else:
            # hide existing time cut
            time_cut.set_data([], [])
            vline.set_xdata(np.nan)

            # calculate brms
            fmin = points[2].extrapolate(T)
            if fmin is None:
                fmin = np.full_like(T, -np.inf)
            fmax = points[3].extrapolate(T)
            if fmax is None:
                fmax = np.full_like(T, np.inf)

            if not np.all(fmax > fmin):
                print 'line 3 must be above line 2'
                return
            SS = S.copy()
            for itime in range(len(T)):
                SS[:,itime][(fmin[itime] > F) | (F > fmax[itime])] = 0

            brms = SS.sum(axis=0)**0.5

            brms = np.log(brms)  # just for plotting

            brms_dots.set_data(T, brms)
            time_cut_ax.axis(spec_ax.get_xlim() + (np.nanmin(brms), np.nanmax(brms)))

    if event.key == 'B':
        if not len(brms_dots.get_xdata()):
            print 'No brms to save'
            return

        # recover data from plot
        tbrms, brms = brms_dots.get_data()
        # FIXME: shift by half delta t? specgram gives middle of bin, while frame convention os beginning of bin
        brms_out = TimeSeries(brms, unit=data.unit, dt=tbrms[1]-tbrms[0], t0=data.t0.value + tbrms[0])
        fname = 'spectool_brms.h5'
        print 'Saving brms to file', fname
        brms_out.write(fname, path='brms')

    if event.key == 'H':  # reset color axis
        print 'Resetting color axis'
        reset_clim()

        
    update_title()
    plt.draw()
    print 'state = %i, nfft = %i, order = %s, cid = %s' % (
        state, nfft, points[state].order if state in (1, 2, 3) else '-', cid_click)
import sys
from numpy import *
from gwpy.timeseries import TimeSeries
from scipy.interpolate import interp1d

fname = sys.argv[1]
data = load(fname)

# Figure out the times covered by the file from the filename
# I should start using HDF5 so I can store metadata
temp = fname.split('.')[0]
temp = temp.split('-')
ifo = temp[0]
st, dur = int(temp[-2]), int(temp[-1])
et = st + dur

offset = 7.9e7
fvco = interp1d(arange(len(data)) + 0.5, data[:, 1] - offset, kind='cubic')

chan = "%s:IMC-VCO_INTERPOLATION" % (ifo,)
vco_data = TimeSeries(vco_interp, epoch=st, sample_rate=imc.sample_rate.value,
                      name=chan, channel=chan)
vco_data.write("%s-vcointerpolation-%u-%u.hdf" % (ifo, st, dur), format='hdf')
def get_magnetic_noise_matfiles(ifoparams, generalparams):
    from datetime import datetime, timedelta

    # number of days
    mdaystart = datetime.strptime(generalparams['mag_day_start'], '%Y%m%d')
    ndays = float(generalparams['ndays'])

    # load gaussian data
    gauss_fname = (generalparams['output_prefix'] + '/' + 'gaussian_frames/' +
                   '%s-GAUSSIAN-DATA-%d-DAYS.gwf')
    print('Loading Gaussian Data')
    gauss1 = MagTimeSeries.read(
        str(gauss_fname % (ifoparams['name'], float(ndays))),
        ifoparams['name'] + ':gauss_data')
    # loop over days
    print('Loading magnetic data')
    for day in range(int(ndays)):
        magfilename = (mdaystart + timedelta(day)).strftime('%Y%m%d')
        magfile1 = ifoparams['mag_directory'] + magfilename + '.mat'
        # construct mag time series
        if day == 0:
            magts1 = MagTimeSeries.from_coughlin_matfile(magfile1,
                                                         name='%s:mag_data' %
                                                         ifoparams['name'])
        else:
            magts1 = magts1.append(MagTimeSeries.from_coughlin_matfile(
                magfile1, name='%s:mag_data' % ifoparams['name']),
                                   inplace=False,
                                   gap='pad')
    # get and apply transfer functions to mag 1
    print('Applying transfer function to data 1')
    # do it on 1 hour timescales
    st = magts1.times[0].value
    for ii in range(int(24 * ndays)):
        print('\t\t Running hour %d' % (ii + 1))
        # crop to our time
        magfft1 = magts1.crop(st, st + 3600).do_unnormalized_fft()
        # get transfer function
        tf1 = powerlaw_transfer_function(
            float(ifoparams['kappa']) * 1e-23, float(ifoparams['beta']),
            magfft1.frequencies.value)
        # apply transfer function and append or initialize data
        if ii == 0:
            mag_gw_data1 = magfft1.apply_transfer_function(tf1).do_proper_ifft(
                magts1.epoch.value, name='%s:mag_data' % ifoparams['name'])
        else:
            mag_gw_data1_tmp = magfft1.apply_transfer_function(
                tf1).do_proper_ifft(magts1.epoch.value + 3600 * ii)
            mag_gw_data1 = mag_gw_data1.append(mag_gw_data1_tmp, inplace=False)
    # add some extra zeros if mag data is short
    # needed for POL magnetometers being 1 second short
    if mag_gw_data1.size != gauss1.size:
        # add a warning, as this could be something people don't want
        print("WARNING: WE ARE PADDING THE MAGNETIC DATA WITH ZEROS")
        mag_gw_data1 = mag_gw_data1.pad(
            (0, gauss1.size - mag_gw_data1.size)).copy()
        magts1 = magts1.pad((0, gauss1.size - magts1.size)).copy()

    # add magnetic and GW data together
    final_data_1 = TimeSeries(
        (mag_gw_data1.value + gauss1.value),
        sample_rate=gauss1.sample_rate,
        unit=gauss1.unit,
        name='%s:FAKE-CONTAMINATED' % ifoparams['name'],
        channel='%s:FAKE-CONTAMINATED' % ifoparams['name'],
        epoch=magts1.times[0].value)

    dur = ndays * 86400
    st = final_data_1.times[0].value

    # write raw magnetic data to a file
    rawmag_fname = generalparams[
        'output_prefix'] + '/correlated_mag_data/%s-RAW-MAG-DATA-%d-%d.gwf'
    magts1.write(str(rawmag_fname % (ifoparams['name'], st, dur)))

    # get total duration we've created
    combined_fname = generalparams[
        'output_prefix'] + '/contaminated_frames/%sFAKE-CONTAMINATED-%d-%d.gwf'
    mag_fname = generalparams[
        'output_prefix'] + '/correlated_mag_data/%sFAKE-MAG-%d-%d.gwf'
    final_data_1.write(str(combined_fname % (ifoparams['name'], st, dur)))
    mag_gw_data1.write(str(mag_fname % (ifoparams['name'], st, dur)))
    fd1_plot = final_data_1.asd(10).plot()
    ax = fd1_plot.gca()
    ax.plot(mag_gw_data1.asd(10))
    ax.plot(gauss1.asd(10))
    ax.set_xlim(1, 64)
    ax.set_xscale('linear')
    fd1_plot.add_legend()
    fd1_plot.savefig('test_plot')
                       st,
                       et,
                       host='nds2.ligo-wa.caltech.edu')
samp_times = arange(len(imc)) / float(imc.sample_rate.value)

coeffs0 = interp(samp_times, times, coeffs[:, 0])
coeffs1 = interp(samp_times, times, coeffs[:, 1]) - 7.6e7

vco_interp = coeffs0 * imc + coeffs1

chan = "%s:IMC-VCO_PREDICTION" % (ifo, )
vco_data = TimeSeries(vco_interp,
                      epoch=st,
                      sample_rate=imc.sample_rate.value,
                      name=chan,
                      channel=chan)
vco_data.write("%s-vcoprediction-%u-%u.hdf" % (ifo, st, dur), format='hdf')

if False:
    import pylab

    pylab.figure()
    pylab.scatter(times, coeffs[:, 0], c='r')
    pylab.plot(samp_times, coeffs0, c='g')

    pylab.figure()
    pylab.scatter(times, coeffs[:, 1], c='r')
    pylab.plot(samp_times, coeffs1, c='g')

    pylab.show()
a, b = polyfit(data[:, 0], data[:, 1], 1)
print "%.1f %u" % (a, b)

# Slide through the data fitting PSL to IMC for data around each sample
coeffs = []
for idx in xrange(maxidx):
    idx1 = max(0, idx - width)
    idx2 = min(idx + width, maxidx)
    coeffs.append(polyfit(data[idx1:idx2, 0], data[idx1:idx2, 1], 1,
                          w=weights[idx1 - idx + width:idx2 - idx + width]))
coeffs = array(coeffs)
times = arange(len(coeffs)) + 0.5
connection = datafind.GWDataFindHTTPConnection()
cache = connection.find_frame_urls(
    ifo[0], '%s_R' % ifo, st, et, urltype='file')

imc = TimeSeries.read(cache, "%s:IMC-F_OUT_DQ" % ifo, st, et)
imc = imc[::16384 / 256]
print imc
samp_times = arange(len(imc)) / 256.

coeffs0 = interp(samp_times, times, coeffs[:, 0])
coeffs1 = interp(samp_times, times, coeffs[:, 1]) - 7.6e7

vco_interp = coeffs0 * imc.data + coeffs1

chan = "%s:IMC-VCO_PREDICTION" % (ifo,)
vco_data = TimeSeries(vco_interp, epoch=st, sample_rate=imc.sample_rate.value,
                      name=chan, channel=chan)
vco_data.write("%s-vcoprediction-%u-%u.hdf" % (ifo, st, dur), format='hdf')
示例#8
0
 segments = findfiles(start,
                      end,
                      chname,
                      prefix='/Users/miyo/Dropbox/KagraData/gif')
 source = [path for files in segments for path in files]
 x500_baro = TimeSeries.read(source=source,
                             name=chname,
                             format='gif',
                             pad=numpy.nan,
                             nproc=1)
 x500_baro = TimeSeries(x500_baro.value,
                        times=x500_baro.times.value,
                        name=chname)
 print x500_baro.name
 x500_baro = x500_baro.resample(8.0)
 x500_baro.write('2019May03_6hours_x500_press.gwf', format='gwf.lalframe')
 #
 chname = 'X2000_BARO'
 segments = findfiles(start,
                      end,
                      chname,
                      prefix='/Users/miyo/Dropbox/KagraData/gif')
 source = [path for files in segments for path in files]
 x2000_baro = TimeSeries.read(source=source,
                              name=chname,
                              format='gif',
                              pad=numpy.nan,
                              nproc=1)
 x2000_baro = TimeSeries(x2000_baro.value,
                         times=x2000_baro.times.value,
                         name=chname)
示例#9
0
from gwpy.timeseries import TimeSeries
from gwpy.time import tconvert, from_gps
import matplotlib.pyplot as plt
import astropy.units as u
import numpy as np

press = np.loadtxt('./data_jma/miyako_press.txt', dtype=np.float32)
# 05/31 2018 01:00 - 07/02 2019 00:00 (JST)
# 1211731218 - 1246028418
times = range(1211731218, 1246028418 + 1, 3600) * u.s
press = TimeSeries(press, times=times, unit='hPa', name='JMA:MIYAKO-PRESS')
press.write('./data_jma/miyako_press.gwf')

toyama = TimeSeries.read('./data_jma/toyama_pressure.gwf',
                         'JMA:TOYAMA-PRESSURE')
takayama = TimeSeries.read('./data_jma/takayama_pressure.gwf',
                           'JMA:TAKAYAMA-PRESSURE')
inotani = TimeSeries.read('./data_jma/inotani_rain.gwf', 'JMA:INOTANI-RAIN')
kamioka = TimeSeries.read('./data_jma/kamioka_rain.gwf', 'JMA:KAMIOKA-RAIN')
shiomisaki = TimeSeries.read('./data_jma/shiomisaki_press.gwf',
                             'JMA:SHIOMISAKI-PRESS')
aomori = TimeSeries.read('./data_jma/aomori_press.gwf', 'JMA:AOMORI-PRESS')
tanegashima = TimeSeries.read('./data_jma/tanegashima_press.gwf',
                              'JMA:TANEGASHIMA-PRESS')
tsuruga = TimeSeries.read('./data_jma/tsuruga_press.gwf', 'JMA:TSURUGA-PRESS')
choshi = TimeSeries.read('./data_jma/choshi_press.gwf', 'JMA:CHOSHI-PRESS')
omaezaki = TimeSeries.read('./data_jma/omaezaki_press.gwf',
                           'JMA:OMAEZAKI-PRESS')
irago = TimeSeries.read('./data_jma/irago_press.gwf', 'JMA:IRAGO-PRESS')
miyako = TimeSeries.read('./data_jma/miyako_press.gwf', 'JMA:MIYAKO-PRESS')
示例#10
0
import sys
from numpy import *
from gwpy.timeseries import TimeSeries
from scipy.interpolate import interp1d

fname = sys.argv[1]
data = load(fname)

# Figure out the times covered by the file from the filename
# I should start using HDF5 so I can store metadata
temp = fname.split('.')[0]
temp = temp.split('-')
ifo = temp[0]
st, dur = int(temp[-2]), int(temp[-1])
et = st + dur

offset = 7.9e7
fvco = interp1d(arange(len(data)) + 0.5, data[:, 1] - offset, kind='cubic')

chan = "%s:IMC-VCO_INTERPOLATION" % (ifo, )
vco_data = TimeSeries(vco_interp,
                      epoch=st,
                      sample_rate=imc.sample_rate.value,
                      name=chan,
                      channel=chan)
vco_data.write("%s-vcointerpolation-%u-%u.hdf" % (ifo, st, dur), format='hdf')
示例#11
0
out = []

timer = LoopTimer(nchunk, 'demodulating')
for ichunk in range(nchunk):
    with getChannel(args.chan, args.start + ichunk * tchunk, tchunk) as data:
        chunk = data.data
        if args.f_demod:
            chunk = chunk * lo  # *= doesn't convert to complex
        out.append(decimator.calc(chunk))
    timer.end(ichunk)
out = np.concatenate(out)

out_name = 'demod_%i_%s-%i-%i.h5' % (args.f_demod, args.chan, args.start, dur)
logger.info('Writing results to %s', out_name)

path = '%s_demod_%i' % (args.chan, args.f_demod)
out = TimeSeries(out,
                 t0=args.start,
                 dt=1.0 / args.f_out,
                 channel=args.chan,
                 name=path)
with h5py.File(out_name, 'w') as out_file:

    # for chan in chans: for f_demod in f_demods:

    out.write(out_file, path=path)
    dataset = out_file[path]
    # storing attrs on dateset itself breaks TimeSeries.read, use parent instead
    dataset.parent.attrs['f_demod'] = args.f_demod

logger.info('Finished!')
示例#12
0
def compute_all(channels,
                start,
                stop,
                history=timedelta(hours=2),
                filename=DEFAULT_FILENAME,
                **kwargs):
    # set up duration (minute-trend data has dt=1min, so reject intervals not on the minute).
    duration = (stop - start).total_seconds() / 60
    assert (stop - start).total_seconds() / 60 == (stop -
                                                   start).total_seconds() // 60
    duration = int(duration)
    logger.info(
        f'Clustering data from {start} to {stop} ({duration} minutes).')

    # download data using TimeSeries.get(), including history of point at t0.
    logger.debug(
        f'Initiating download from {start} to {stop} with history={history}...'
    )
    dl = TimeSeriesDict.get(channels,
                            start=to_gps(start - history),
                            end=to_gps(stop))
    logger.info(f'Downloaded from {start} to {stop} with history={history}.')

    if exists('input.npy'):
        input_data = np.load('input.npy')
        logger.info('Loaded input matrix.')
    else:
        # generate input matrix of the form [sample1;...;sampleN] with sampleK = [feature1,...,featureN]
        # for sklearn.cluster algorithms. This is the slow part of the function, so a progress bar is shown.
        logger.debug(f'Initiating input matrix generation...')
        with Progress('building input', (duration * 60)) as progress:
            input_data = stack([
                concatenate([
                    progress(dl[channel].crop,
                             t,
                             start=to_gps(start + timedelta(seconds=t) -
                                          history),
                             end=to_gps(start + timedelta(seconds=t))).value
                    for channel in channels
                ]) for t in range(0, int(duration * 60), 60)
            ])

        # verify input matrix dimensions.
        assert input_data.shape == (duration,
                                    int(
                                        len(channels) *
                                        history.total_seconds() / 60))
        np.save('input.npy', input_data)
        logger.info('Completed input matrix generation.')

    params = {
        'quantile': .3,
        'eps': .3,
        'damping': .9,
        'preference': -200,
        'n_neighbors': 10,
        'n_clusters': 15,
        'min_samples': 20,
        'xi': 0.05,
        'min_cluster_size': 0.1
    }

    if exists('X.npy'):
        X = np.load('X.npy')
        logger.info('Loaded X')
    else:
        # normalize dataset for easier parameter selection
        X = StandardScaler().fit_transform(input_data)
        np.save('X.npy', X)
        logger.info('Generated X')

    if exists('bandwidth.npy'):
        bandwidth = np.load('bandwidth.npy')
        logger.info('Loaded bandwidth')
    else:
        # estimate bandwidth for mean shift
        bandwidth = cluster.estimate_bandwidth(X, quantile=params['quantile'])
        np.save('bandwidth.npy', bandwidth)
        logger.info('Generated bandwidth')

    if exists('connectivity.npy'):
        connectivity = np.load('connectivity.npy', allow_pickle=True)
        logger.info('Loaded connectivity')
    else:
        # connectivity matrix for structured Ward
        connectivity = kneighbors_graph(X,
                                        n_neighbors=params['n_neighbors'],
                                        include_self=False)
        # make connectivity symmetric
        connectivity = 0.5 * (connectivity + connectivity.T)
        np.save('connectivity.npy', connectivity)
        logger.info('Generated connectivity')

    ms = cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True)
    two_means = cluster.MiniBatchKMeans(n_clusters=params['n_clusters'])
    ward = cluster.AgglomerativeClustering(n_clusters=params['n_clusters'],
                                           linkage='ward',
                                           connectivity=connectivity)
    spectral = cluster.SpectralClustering(n_clusters=params['n_clusters'],
                                          eigen_solver='arpack',
                                          affinity="nearest_neighbors")
    dbscan = cluster.DBSCAN(eps=params['eps'])
    optics = cluster.OPTICS(min_samples=params['min_samples'],
                            xi=params['xi'],
                            min_cluster_size=params['min_cluster_size'])
    affinity_propagation = cluster.AffinityPropagation(
        damping=params['damping'], preference=params['preference'])
    average_linkage = cluster.AgglomerativeClustering(
        linkage="average",
        affinity="cityblock",
        n_clusters=params['n_clusters'],
        connectivity=connectivity)
    birch = cluster.Birch(n_clusters=params['n_clusters'])
    gmm = mixture.GaussianMixture(n_components=params['n_clusters'],
                                  covariance_type='full')

    clustering_algorithms = (
        ('MiniBatchKMeans', two_means),
        ('AffinityPropagation', affinity_propagation), ('MeanShift', ms),
        ('SpectralClustering', spectral), ('DBSCAN', dbscan),
        ('OPTICS', optics), ('Birch', birch), ('GaussianMixture', gmm)
        # ('Ward', ward),
        # ('AgglomerativeClustering', average_linkage),
    )

    for name, algorithm in clustering_algorithms:
        if exists(f'part-{name}-{filename}'):
            labels = TimeSeries.read(f'part-{name}-{filename}',
                                     f'{name}-labels')
            logger.debug(f'LOADED {name}.')
        else:
            logger.debug(f'doing {name}...')
            # catch warnings related to kneighbors_graph
            with warnings.catch_warnings():
                warnings.filterwarnings(
                    "ignore",
                    message="the number of connected components of the " +
                    "connectivity matrix is [0-9]{1,2}" +
                    " > 1. Completing it to avoid stopping the tree early.",
                    category=UserWarning)
                warnings.filterwarnings(
                    "ignore",
                    message="Graph is not fully connected, spectral embedding"
                    + " may not work as expected.",
                    category=UserWarning)
                algorithm.fit(X)

            if hasattr(algorithm, 'labels_'):
                y_pred = algorithm.labels_.astype(np.int)
            else:
                y_pred = algorithm.predict(X)
            # cast the output labels to a TimeSeries so that cropping is easy later on.
            labels = TimeSeries(
                y_pred,
                times=dl[channels[0]].crop(start=to_gps(start),
                                           end=to_gps(stop)).times,
                name=f'{name}-labels')

            labels.write(f'part-{name}-{filename}')
        # put labels in data download dictionary for easy saving.
        dl[labels.name] = labels

    # write data download and labels to specified filename.
    cache_file = abspath(filename)
    if exists(cache_file):
        remove(cache_file)
    dl.write(cache_file)
    logger.info(f'Wrote cache to {filename}')
def dump_calibrated_data(fname):
    data = numpy.load(fname)

    # Figure out the times covered by the file from the filename
    # I should start using HDF5 so I can store metadata
    temp = fname.split('.')[0]
    temp = temp.split('-')
    ifo = temp[0]
    st, dur = int(temp[-2]), int(temp[-1])
    et = st + dur

    maxidx = len(data)
    width = 45

    weights = 1. - ((numpy.arange(-width, width) / float(width))**2)

    # The VCO frequencies are integers so we could dither them
    # to avoid quantization error if we wanted to be fancy
    # but it seems to make no differece
    if False:
        from numpy.random import triangular
        data[:, 1] += triangular(-1., 0., 1., size=len(data))

    # Just fit the whole thing at once, to get a single coefficient
    a, b = numpy.polyfit(data[:, 0], data[:, 1], 1)
    print "%.1f %u" % (a, b)

    # Slide through the data fitting PSL to IMC for data around each sample
    coeffs = []
    for idx in xrange(maxidx):
        idx1 = max(0, idx - width)
        idx2 = min(idx + width, maxidx)
        coeffs.append(
            numpy.polyfit(data[idx1:idx2, 0],
                          data[idx1:idx2, 1],
                          1,
                          w=weights[idx1 - idx + width:idx2 - idx + width]))
    coeffs = numpy.array(coeffs)
    times = numpy.arange(len(coeffs)) + 0.5
    connection = datafind.GWDataFindHTTPConnection()
    cache = connection.find_frame_urls(ifo[0],
                                       '%s_R' % ifo,
                                       st,
                                       et,
                                       urltype='file')

    imc = TimeSeries.read(cache, "%s:IMC-F_OUT_DQ" % ifo, st, et)
    imc = imc[::16384 / 256]
    print imc
    samp_times = numpy.arange(len(imc)) / 256.

    coeffs0 = numpy.interp(samp_times, times, coeffs[:, 0])
    coeffs1 = numpy.interp(samp_times, times, coeffs[:, 1]) - 7.6e7

    vco_interp = coeffs0 * imc.data + coeffs1

    chan = "%s:IMC-VCO_PREDICTION" % (ifo, )
    vco_data = TimeSeries(vco_interp,
                          epoch=st,
                          sample_rate=imc.sample_rate.value,
                          name=chan,
                          channel=chan)
    vco_data.write("%s-vcoprediction-%u-%u.hdf" % (ifo, st, dur), format='hdf')
示例#14
0
class TimeSeriesTests(unittest.TestCase):
    """`TestCase` for the timeseries module
    """
    framefile = os.path.join(
        os.path.split(__file__)[0], 'data', 'HLV-GW100916-968654552-1.gwf')
    tmpfile = '%s.%%s' % tempfile.mktemp(prefix='gwpy_test_')

    def setUp(self):
        random.seed(SEED)
        self.data = random.random(100)

    def test_creation(self):
        TimeSeries(self.data)

    def test_creation_with_metadata(self):
        self.ts = TimeSeries(self.data,
                             sample_rate=1,
                             name='TEST CASE',
                             epoch=0,
                             channel='TEST CASE')
        repr(self.ts)
        self.assertTrue(self.ts.epoch == GPS_EPOCH)
        self.assertTrue(self.ts.sample_rate == ONE_HZ)
        self.assertTrue(self.ts.dt == ONE_SECOND)

    def frame_read(self, format=None):
        ts = TimeSeries.read(self.framefile, 'L1:LDAS-STRAIN', format=format)
        self.assertTrue(ts.epoch == Time(968654552, format='gps', scale='utc'))
        self.assertTrue(ts.sample_rate == units.Quantity(16384, 'Hz'))
        self.assertTrue(ts.unit == units.Unit('strain'))

    def test_frame_read_lalframe(self):
        try:
            self.frame_read(format='lalframe')
        except ImportError as e:
            raise unittest.SkipTest(str(e))

    def test_frame_read_framecpp(self):
        try:
            self.frame_read(format='framecpp')
        except ImportError as e:
            raise unittest.SkipTest(str(e))

    def test_ascii_write(self, delete=True):
        self.ts = TimeSeries(self.data,
                             sample_rate=1,
                             name='TEST CASE',
                             epoch=0,
                             channel='TEST CASE')
        asciiout = self.tmpfile % 'txt'
        self.ts.write(asciiout)
        if delete and os.path.isfile(asciiout):
            os.remove(asciiout)
        return asciiout

    def test_ascii_read(self):
        fp = self.test_ascii_write(delete=False)
        try:
            ts = TimeSeries.read(fp)
        finally:
            if os.path.isfile(fp):
                os.remove(fp)

    def test_hdf5_write(self, delete=True):
        self.ts = TimeSeries(self.data,
                             sample_rate=1,
                             name='TEST CASE',
                             epoch=0,
                             channel='TEST CASE')
        hdfout = self.tmpfile % 'hdf'
        try:
            self.ts.write(hdfout)
        except ImportError as e:
            raise unittest.SkipTest(str(e))
        finally:
            if delete and os.path.isfile(hdfout):
                os.remove(hdfout)
        return hdfout

    def test_hdf5_read(self):
        try:
            hdfout = self.test_hdf5_write(delete=False)
        except ImportError as e:
            raise unittest.SkipTest(str(e))
        else:
            try:
                ts = TimeSeries.read(hdfout, 'TEST CASE')
            finally:
                if os.path.isfile(hdfout):
                    os.remove(hdfout)
# -----------
if False:
    chname = 'CALC_STRAIN'
    segments = findfiles(start,
                         end,
                         chname,
                         prefix='/Users/miyo/Dropbox/KagraData/gif')
    source = [path for files in segments for path in files]
    strain = TimeSeries.read(source=source,
                             name=chname,
                             format='gif',
                             pad=numpy.nan,
                             nproc=1)
    strain = TimeSeries(strain.value, times=strain.times.value, name=chname)
    strain = strain.resample(8.0)
    strain.write('Dec10_3hours_strain.gwf', format='gwf.lalframe')

# -----------
# Pressure
# -----------
if True:
    chname = 'X500_BARO'
    segments = findfiles(start,
                         end,
                         chname,
                         prefix='/Users/miyo/Dropbox/KagraData/gif')
    source = [path for files in segments for path in files]
    strain = TimeSeries.read(source=source,
                             name=chname,
                             format='gif',
                             pad=numpy.nan,