def generate_timeseries(data_list, setname="MagneticFields"): """ Generate time series using list of HDF5 data file paths Parameters ---------- data_list : dictionary Dictionary that stored the path to the HDF5 data file for each segment of data available. Returns ------- full_data : Array of segment's name """ full_data = TimeSeriesList() for seg in sorted(data_list): hfile = h5py.File(data_list[seg], "r") full_data.append(retrieve_data_timeseries(hfile, "MagneticFields")) hfile.close() return full_data
def getDataInRange(station, startTime, endTime, sortTime=True, convert=False, path='./', verbose=False): ''' Get list of data in time range station: str Name of station. startTime: float (unix time), str Earliest time. String formatted as 'yyyy-mm-dd-HH-MM-SS' (omitted values defaulted as 0) endTime: float (unix time), str Last time. Format same as startTime sortTime: bool (default: True) Actively sort output by start time (using data in file) convert: boolean (default: False) Whether to use conversion function from file. path: str (default './') Location of files verbose: bool (default False) Verbose output returns (data, sanity, fileList). Data and sanity are astropy TimeSeriesList Note: must evaluate values in 'sanity' (e.g., using 'value' attribute) to get boolean Note: use, e.g., dataTSL.join(pad=float('nan'),gap='pad') to combine TimeSeriesList into single Time series. ''' if (verbose): print 'getDataInRange() --- Finding files' fList = getFListInRange(station, startTime, endTime, path=path) numFiles = len(fList) # get data if (verbose): print 'getDataInRange() --- Reading files' dataList = [None] * numFiles saneList = [None] * numFiles for i in range(numFiles): dataList[i], saneList[i] = getDataFromFile(fList[i], convert=convert) # sort if needed if (sortTime): if (verbose): print 'getDataInRange() --- Sorting data' # do insertion sort (likely that list is sorted) sortIndex = range(numFiles) # sorted list of indices for sRange in range(1, numFiles): # sRange is size of sorted segment # note, sortIndex[sRange] = sRange insPtTime = dataList[sRange].epoch # for point being inserted insHere = sRange # place to insert point while (insHere > 0 and dataList[sortIndex[insHere - 1]].epoch > insPtTime): insHere -= 1 # decrement until finding place to insert # insert point dummy1 = sRange # point being moved while (insHere <= sRange): dummy2 = sortIndex[insHere] sortIndex[insHere] = dummy1 dummy1 = dummy2 insHere += 1 else: sortIndex = range(numFiles) # put data in TimeSeriesList dataTSL = TimeSeriesList() saneTSL = TimeSeriesList() for i in sortIndex: dataTSL.append(dataList[i]) saneTSL.append(saneList[i]) return dataTSL, saneTSL, [fList[i] for i in sortIndex]
def get_data(station,start_time,end_time,rep='/GNOMEDrive/gnome/serverdata/', resample=None,activity=False,unit='V',output='all',segtxt=False, channel='MagneticFields'): """ Glob all files withing user-defined period and extract data. Parameters ---------- station : str Name of the station to be analysed start_time : int GPS timestamp of the first required magnetic field data end_time : int GPS timestamp of the last required magnetic field data rep : str Data repository. Default is the GNOME server repository. resample : int New sampling rate activity : bool Output the activity of data unit : str Output unit format (V for voltage, pT for magnetic field) output : str Output data to be extracted. If output is equal to 'ts', only the time series will be given. Returns ------- ts_data : pycbc.types.TimeSeries Time series data for selected time period. ts_list : dictionary List of time series. activity : gwpy.segments.DataQualityDict List all the segment of data retrieved t0 : astropy.time.Time First timestamp t1 : astropy.time.Time Last timestamp """ if start_time==None or end_time==None: print "ERROR: No start or end date given..." quit() # Define data attribute to be extracted from HDF5 files setname = channel dstr = ['%Y','%m','%d','%H','%M','%S','%f'] dsplit = '-'.join(dstr[:start_time.count('-')+1]) start = datetime.strptime(start_time,dsplit) dsplit = '-'.join(dstr[:end_time.count('-')+1]) end = datetime.strptime(end_time,dsplit) dataset = [] for date in numpy.arange(start,end,timedelta(minutes=1)): date = date.astype(datetime) path1 = rep+station+'/'+date.strftime("%Y/%m/%d/") path2 = station+'_'+date.strftime("%Y%m%d_%H%M*.hdf5") fullpath = os.path.join(path1,path2) dataset += glob.glob(fullpath) if len(dataset)==0: print "ERROR: No data files were found..." quit() file_order,data_order = {},{} for fname in dataset: hfile = h5py.File(fname, "r") # Extract all atributes from the data attrs = hfile[setname].attrs # Define each attribute dstr, t0, t1 = attrs["Date"], attrs["t0"], attrs["t1"] # Construct GPS starting time from data start_utc = construct_utc_from_metadata(dstr, t0) # Construct GPS ending time from data end_utc = construct_utc_from_metadata(dstr, t1) # Represent the range of times in the semi-open interval segfile = segment(start_utc,end_utc) file_order[segfile] = fname data_order[segfile] = hfile # Create list of time series from every segment ts_list = TimeSeriesList() for seg in sorted(file_order): hfile = h5py.File(file_order[seg], "r") dset = hfile[setname] sample_rate = dset.attrs["SamplingRate(Hz)"] gps_epoch = construct_utc_from_metadata(dset.attrs["Date"], dset.attrs["t0"]) data = dset[:] if unit=='pT': data = eval(dset.attrs['MagFieldEq'].replace('MagneticFields','data').replace('[pT]','')) ts_data = TimeSeries(data, sample_rate=sample_rate, epoch=gps_epoch) ts_list.append(ts_data) hfile.close() # Generate an ASCII representation of the GPS timestamped segments of time covered by the input data seglist = segmentlist(data_order.keys()) # Sort the segment list seglist.sort() # Initialise dictionary for segment information activity = DataQualityDict() if segtxt: # Save time span for each segment in ASCII file with open("segments.txt", "w") as fout: for seg in seglist: print >>fout, "%10.9f %10.9f" % seg # FIXME: Active should be masked from the sanity channel activity[station] = DataQualityFlag(station,active=seglist.coalesce(),known=seglist.coalesce()) # Generate an ASCII representation of the GPS timestamped segments of time covered by the input data seglist = segmentlist(data_order.keys()) # Sort the segment list seglist.sort() # Retrieve channel data for all the segments if unit=='V': full_data = numpy.hstack([data_order[seg][setname][:] for seg in seglist]) if unit=='pT': full_data = [] for seg in seglist: dset = data_order[seg][setname] data = dset[:] data = eval(dset.attrs['MagFieldEq'].replace('MagneticFields','data').replace('[pT]','')) full_data = numpy.hstack((full_data,data)) for v in data_order.values(): v.close() new_sample_rate = float(sample_rate) if resample==None else float(resample) new_data_length = len(full_data)*new_sample_rate/float(sample_rate) full_data = scipy.signal.resample(full_data,int(new_data_length)) # Models a time series consisting of uniformly sampled scalar values ts_data = types.TimeSeries(full_data,delta_t=1./new_sample_rate,epoch=seglist[0][0]) if output=='ts': return ts_data t0,t1 = time_convert(start_time,end_time) return ts_data,ts_list,activity,t0,t1