def rotate(i_df, site, date): """Rotate XYZ to HDZ for selec sites, append to existing DataFrame and return Parameters ---------- i_df : DataFrame IMAGE magnetometer data loaded with image.load() site : List[str] List of sites to rotate date : str or datetime-like Date to load declination for Returns ------- i_df : DataFrame DataFrame with HD magnetic field coordinates and station cgm coordinates, lshell and declination """ dt = pd.to_datetime(date) for stn in site: stn_dat = utils.load_station_coor(param=stn, year=dt.year) # if the stn_dat can't be found don't rotate if stn_dat is None: return i_df dec = float(stn_dat['declination']) # some of the IMAGE magnetometers # have negative Z values. Z should # always positive. These mags are # likely measuring variations which # we can't calculate H and D for. if any(i_df[stn+'_Z'] < 0): i_df[stn+'_H'] = np.nan i_df[stn+'_D'] = np.nan else: h = i_df[stn+'_X'].astype(float) * np.cos(np.deg2rad(dec)) + \ i_df[stn+'_Y'].astype(float) * np.sin(np.deg2rad(dec)) d = i_df[stn+'_Y'].astype(float) * np.cos(np.deg2rad(dec)) - \ i_df[stn+'_X'].astype(float) * np.sin(np.deg2rad(dec)) i_df[stn+'_H'] = h i_df[stn+'_D'] = d # fill in station cooridinat info # this would be better as metadata # but not possible in pandas i_df[stn+'_declination'] = float(stn_dat['declination']) i_df[stn+'_cgmlat'] = float(stn_dat['cgm_latitude']) i_df[stn+'_cgmlon'] = float(stn_dat['cgm_longitude']) i_df[stn+'_lshell'] = float(stn_dat['lshell']) i_df[stn+'_mlt'] = float(stn_dat['mlt_midnight']) return i_df
def rotate(i_df, site, date): """Rotate XYZ to HDZ for select sites, append to existing DataFrame and return Parameters ---------- i_df : DataFrame CARISMA magnetometer data loaded with image.load() site : site to rotate List of sites to rotate date : str or datetime-like Date to load declination for Returns ------- i_df : DataFrame DataFrame with HD magnetic field coordinates and station cgm coordinates, lshell and declination """ dt = pd.to_datetime(date) # get a list of column names c_name = list(i_df.columns.values) for stn in site: stn = stn.upper() if stn+'_X' not in c_name: continue stn_dat = utils.load_station_coor(param=stn, year=dt.year) # if the stn_dat can't be found don't rotate if stn_dat is None: return i_df dec = float(stn_dat['declination']) h = i_df[stn+'_X'].astype(float) * np.cos(np.deg2rad(dec)) + \ i_df[stn+'_Y'].astype(float) * np.sin(np.deg2rad(dec)) d = i_df[stn+'_Y'].astype(float) * np.cos(np.deg2rad(dec)) - \ i_df[stn+'_X'].astype(float) * np.sin(np.deg2rad(dec)) i_df[stn+'_H'] = h i_df[stn+'_D'] = d # fill in station cooridinat info # this would be better as metadata # but not possible in pandas i_df[stn+'_declination'] = float(stn_dat['declination']) i_df[stn+'_cgmlat'] = float(stn_dat['cgm_latitude']) i_df[stn+'_cgmlon'] = float(stn_dat['cgm_longitude']) i_df[stn+'_lshell'] = float(stn_dat['lshell']) i_df[stn+'_mlt'] = float(stn_dat['mlt_midnight']) return i_df
def get_spec(site, sdate, edate=None, ndays=1, psd_dir=local_dir, fmin=0.80, fmax=15.01, add_omni: bool = False, omni_lags=[0], verbose: bool = False): """Returns a Pandas DataFrame containing the PSD spectrum, station name, and magnetic local time (MLT) of the station for a set of dates. Will loop through multiple stations if provided. Parameters ---------- site : str String or array of strings with the station names which the spectra will be loaded sdate : str of datetime-like Initial day to be loaded edate : [str of datetime-like Final day to be loaded ndays : int, optional Number of days to load if edate is not defined, by default 1 psd_dir : str, optional Directory of PSD files, by default local_dir fmin : float, optional Minimum frequency (mHz) to load, by default 0.80 fmax : float, optional Maximum frequency (mHz) to load, by default 15.01 add_omni : bool, optional Add hourly omni data to data frame, by default False omni_lags : list, optional Add lagged omni data; list is a list of hourly lags to include, by default [0], only include the current hours omni data. This is attached to each spectra or row in the data frame and so the returned DataFrame can get large quite quickly if looking at large lag times. verbose : bool, optional Print some simple messages, by default False Returns ------- Pandas DataFrame DataFrame containing the spectrum, station, MLT, frequency resolution """ if add_omni: if type(omni_lags) is int: omni_lags = [omni_lags] elif type(omni_lags) is np.ndarray: omni_lags = omni_lags.tolist() max_l = int(max(omni_lags) + 1) o_dat = omni.load( pd.to_datetime(sdate) - timedelta(hours=max_l), pd.to_datetime(edate) + timedelta(hours=max_l)) print('Adding omni data with lags:{0}'.format(omni_lags)) if type(site) is str: site = [site.upper()] # get a list of days to loop over if edate is not None: d_arr = pd.Series(pd.date_range(start=sdate, end=edate, freq='D')) else: d_arr = pd.Series(pd.date_range(start=sdate, periods=ndays, freq='D')) # convert fmin and fmax to Hz fmin = fmin / 1000. fmax = fmax / 1000. df_r = pd.DataFrame() for stn in site: yr = -1 for di, dt in d_arr.iteritems(): df_t = pd.Series(pd.date_range(dt, freq='H', periods=24)) fn = '{0:04d}{1:02d}{2:02d}{3}_psd.txt.gz'.format( dt.year, dt.month, dt.day, stn.upper()) fn = os.path.join(psd_dir, '{0:04d}'.format(dt.year), '{0:02d}'.format(dt.month), '{0:02d}'.format(dt.day), fn) # read in file, create an empty # data frame if file is not found try: df_in = pd.read_csv(fn, compression='gzip', header=0) except FileNotFoundError: if verbose: print("File not found {0}".format(fn)) continue if verbose: print("File loaded {0}".format(fn)) # drop unnecessary columns df_in = df_in[(df_in.freq >= fmin) & (df_in.freq <= fmax)] freq = df_in['freq'].copy() * 1000. df_in = df_in.drop( labels=['freq', 'station', 'decl', 'L', 'cgm_lat', 'cgm_lon'], axis=1) # get frequency resolution f_res = freq.iloc[1] - freq.iloc[0] # trasnpose array and drop new index column # convert from nT^2/Hz to nT^2/mHz df_in = df_in.transpose().reset_index() df_in = df_in.drop(labels=['index'], axis=1) df_in = df_in.div(1000.) # add time to the array df_in['t'] = df_t df_in['stn'] = stn.upper() df_in['f res mHz'] = f_res df_in = df_in.set_index('t') # add MLT to the array if dt.year != yr: print(stn, dt.year) stn_c = utils.load_station_coor(param=stn, year=dt.year) mlt = np.arange(0, 24) mlt = (mlt + 24 - float(stn_c['mlt_midnight'])) % 24. df_in['mlt'] = mlt df_r = df_r.append(df_in) yr = dt.year # append read in data if add_omni: df_in = df_in.join(o_dat, how='left') return df_r
def sum_psd(site, sdate, edate=None, ndays=1, psd_dir=local_dir, fmin=0.80, fmax=15.01, add_omni: bool = False, omni_lags=[0], verbose: bool = False): """Returns a Pandas DataFrame containing the integrated and summed PSD. Will loop through multiple stations if provided. Parameters ---------- site : str String or array of strings with the station names which the spectra will be loaded sdate : str of datetime-like Initial day to be loaded edate : [str of datetime-like Final day to be loaded ndays : int, optional Number of days to load if edate is not defined, by default 1 psd_dir : str, optional Directory of PSD files, by default local_dir fmin : float, optional Minimum frequency (mHz) to load, by default 0.80 fmax : float, optional Maximum frequency (mHz) to load, by default 15.01 add_omni : bool, optional Add hourly omni data to data frame, by default False omni_lags : list, optional Add lagged omni data; list is a list of hourly lags to include, by default [0], only include the current hours omni data. This is attached to each spectra or row in the data frame and so the returned DataFrame can get large quite quickly if looking at large lag times. verbose : bool, optional Print some simple messages, by default False Returns ------- Pandas DataFrame Date frame with integrated and summed PSD between fmin and fmax, MLT, L-shell, station. """ if add_omni: if type(omni_lags) is int: omni_lags = [omni_lags] elif type(omni_lags) is np.ndarray: omni_lags = omni_lags.tolist() max_l = int(max(omni_lags) + 1) o_dat = omni.load( pd.to_datetime(sdate) - timedelta(hours=max_l), pd.to_datetime(edate) + timedelta(hours=max_l)) print('Adding omni data with lags:{0}'.format(omni_lags)) if type(site) is str: site = [site.upper()] # get a list of days to loop over if edate is not None: d_arr = pd.Series(pd.date_range(start=sdate, end=edate, freq='D')) else: d_arr = pd.Series(pd.date_range(start=sdate, periods=ndays, freq='D')) df_r = pd.DataFrame() for stn in site: df_psd = pd.DataFrame() yr = -1 for di, dt in d_arr.iteritems(): # hourly array for time axis df_t = pd.Series(pd.date_range(dt, freq='H', periods=24)) fn = '{0:04d}{1:02d}{2:02d}{3}_psd.txt.gz'.format( dt.year, dt.month, dt.day, stn.upper()) fn = os.path.join(psd_dir, '{0:04d}'.format(dt.year), '{0:02d}'.format(dt.month), '{0:02d}'.format(dt.day), fn) # read in file, create an empty # data frame if file is not found try: df_in = pd.read_csv(fn, compression='gzip', header=0) except FileNotFoundError: if verbose: print("File not found: {0}".format(fn)) sp = pd.DataFrame({'t': df_t}) sp['ipsd'] = np.nan sp['spsd'] = np.nan sp['mlt'] = np.nan sp['lshell'] = np.nan df_psd = df_psd.append(sp, ignore_index=True, sort=True) continue if verbose: print("File loaded: {0}".format(fn)) # read in station coordinates if dt.year != yr: print(stn, dt.year) stn_c = utils.load_station_coor(param=stn, year=dt.year) # convert frequency to mHz df_in['freq'] = df_in['freq'] * 1000. # find frequency range gf = df_in['freq'].between(fmin, fmax) gd = df_in[gf] # sum power and convert from # nT^2/Hz to nT^2/mHz sx = gd.sum(axis=0, min_count=gd.shape[0])['H00':'H23'] sx = sx / 1000. # integrate power ix = sx / (gd['freq'].max() - gd['freq'].min()) # fill data frame mlt = np.arange(0, 24) mlt = (mlt + 24 - float(stn_c['mlt_midnight'])) % 24. sp = pd.DataFrame({ 't': df_t, 'ipsd': ix.values, 'spsd': sx.values, 'lshell': float(stn_c['lshell']), 'mlt': mlt }) df_psd = df_psd.append(sp, ignore_index=True, sort=True) yr = dt.year if df_psd.empty: continue df_psd['ipsd'] = pd.to_numeric(df_psd['ipsd']) df_psd['spsd'] = pd.to_numeric(df_psd['spsd']) df_psd['stn'] = stn.upper() df_psd = df_psd.sort_values(by=['t']).reset_index(drop=True) df_psd = df_psd.set_index('t') df_psd = df_psd.dropna(subset=['ipsd']) # add omni data and lagged # omni if needed to each station if add_omni: o_t = o_dat.index o_c = o_dat.columns for lags in omni_lags: if lags != 0: o_dat['t'] = o_t + timedelta(hours=lags) o_dat = o_dat.set_index('t') o_dat.columns = o_c + '_{0}'.format(lags) df_psd = df_psd.join(o_dat, how='left', sort=True) # getting pandas warning here # do we need to sort here? if df_r.empty: df_r = df_psd.copy() else: df_r = df_r.append(df_psd) return df_r