def expand_string(s, project, site, now, test_mode, **kwargs): d = kwargs.copy() d.update({'project': project or '', 'site': site or '', 'date': dt64.strftime(now, '%Y-%m-%d'), 'datetime': dt64.strftime(now, '%Y-%m-%d %H:%M:%SUT'), 'time': dt64.strftime(now, '%H:%M:%SUT'), 'test_mode': ' (test mode) ' if test_mode else '' }) return s.format(**d)
def uit_path(t, project, site, data_type, archive, channels): if uit_password is None: raise Exception(__name__ + '.uit_password must be set, ' + 'to obtain a password see ' + 'http://flux.phys.uit.no/div/DataAccess.html') # Expand the path format string with the specific UIT variables, # including password. a, d = copy.deepcopy(ap.get_archive_info(project, site, data_type, archive=archive)) d['uit_password'] = uit_password fstr = path_fstr % d return dt64.strftime(t, fstr)
def make_links(link_dir, link_data): for link in link_data: link_name = os.path.join(link_dir, link['name']) # Make the target a relative path target = os.path.relpath(dt64.strftime(link['date'], link['fstr']), os.path.dirname(link_name)) if os.path.islink(link_name) and \ os.readlink(link_name) == target: # Exists and is correct logger.debug('link exists and is correct: ' + link_name + ' -> ' + target) continue if os.path.lexists(link_name): logger.debug('link exists but is incorrect: ' + link_name) os.unlink(link_name) logger.debug('creating link ' + link_name + ' -> ' + target) os.symlink(target, link_name)
t1 = start_time else: t1 = dt64.get_start_of_month(start_time) while t1 < end_time: if args.single_qdc: t2 = end_time else: t2 = dt64.get_start_of_next_month(t1) if args.plot: mag_qdc = ap.magdata.load_qdc(project_uc, site_uc, t1) if mag_qdc is not None: lh = mag_qdc.plot(axes=ax) for h in lh: h.set_label(dt64.strftime(t1, '%Y-%m-%d')) ax = plt.gca() else: archive, ad = ap.get_archive_info(project_uc, site_uc, 'MagData', archive=getattr(args, 'archive')) mag_data = ap.load_data(project_uc, site_uc, 'MagData', t1, t2, archive=archive, raise_all=args.raise_all) if mag_data is not None: mag_qdc = mag_data.make_qdc(smooth=args.smooth) qdc_archive, qdc_ad \ = ap.get_archive_info(project_uc, site_uc, 'MagQDC')
def load_data(project, site, data_type, start_time, end_time, archive=None, channels=None, path=None, load_function=None, raise_all=False, cadence=None, aggregate=None, filter_function=None, use_cache=None, now=None): '''Load data. project: name of the project (upper case) site: site abbreviation (upper case) data_type: class name of the data type to be loaded start_time: start time (inclusive) of the data set end_time: end time (exclusive) of the data set The following optional parameters are recognised: archive: name of the archive. Required if more than one archive is present and there is not an archive called "default". channels: data channel(s) to load. All are loaded if not specified path: URL or file path, specified as a strftime format specifier. Alternatively can be a function reference which is passed the time and returns the filename. If given this overrides the standard load path. load_function: Pass responsibility for loading the data to the given function reference, after validating the input parameters. ''' archive, ad = get_archive_info(project, site, data_type, archive=archive) cad_units = dt64.get_units(ad['nominal_cadence']) start_time = start_time.astype('datetime64[%s]' % cad_units) end_time = end_time.astype('datetime64[%s]' % cad_units) if channels is None: channels = ad['channels'] else: # Could be as single channel name or a list of channels if isinstance(channels, six.string_types): if channels not in ad['channels']: raise Exception('Unknown channel') else: for c in channels: if c not in ad['channels']: raise Exception('Unknown channel') if path is None: path = ad['path'] if load_function is None: load_function = ad.get('load_function') if filter_function is None: filter_function = ad.get('filter_function') if load_function: # Pass responsibility for loading to some other # function. Parameters have already been checked. return load_function(project, site, data_type, start_time, end_time, archive=archive, channels=channels, path=path, raise_all=raise_all, cadence=cadence, aggregate=aggregate, filter_function=filter_function) data = [] for t in dt64.dt64_range(dt64.floor(start_time, ad['duration']), end_time, ad['duration']): # A local copy of the file to be loaded, possibly an # uncompressed version. temp_file_name = None t2 = t + ad['duration'] if hasattr(path, '__call__'): # Function: call it with relevant information to get the path file_name = path(t, project=project, site=site, data_type=data_type, archive=archive, channels=channels) else: file_name = dt64.strftime(t, path) url_parts = urlparse(file_name) if url_parts.scheme in ('ftp', 'http', 'https'): if ad.get('cache_dir'): if now is None: now = np.datetime64('now', 's') dtd = ad.get('data_transfer_delay', np.timedelta64(0, 's')) if use_cache is None: if t2 + dtd < now: uc = True # OK to try fetching from the cache else: uc = False logger.debug('data too new to cache') else: uc = use_cache cache_filename = os.path.normpath(os.path.join(ad['cache_dir'], file_name.replace(':', '/'))) logger.debug('cache file: ' + cache_filename) if uc: if os.path.exists(cache_filename): file_name = cache_filename logger.debug('cache hit') else: file_name = download_url(file_name, dest=cache_filename) else: # Download but discard after use file_name = download_url(file_name) temp_file_name = file_name else: # No cache so discard after use file_name = download_url(file_name) temp_file_name = file_name if file_name is None: continue elif url_parts.scheme == 'file': file_name = url_parts.path if not os.path.exists(file_name): logger.info('missing file %s', file_name) continue # Now only need to access local files if os.path.splitext(url_parts.path)[1] in ('.gz', '.dgz'): # Transparently uncompress gunzipped_file = None try: logger.debug('unzipping %s', file_name) gunzipped_file = NamedTemporaryFile(prefix=__name__, delete=False) with gzip.open(file_name, 'rb') as gzip_file: shutil.copyfileobj(gzip_file, gunzipped_file) gunzipped_file.close() except KeyboardInterrupt: raise except Exception as e: if gunzipped_file: gunzipped_file.close() os.unlink(gunzipped_file.name) gunzipped_file = None continue finally: if temp_file_name: logger.debug('deleting temporary file ' + temp_file_name) os.unlink(temp_file_name) temp_file_name = gunzipped_file.name file_name = temp_file_name logger.info('loading ' + file_name) try: tmp = ad['load_converter'](file_name, ad, project=project, site=site, data_type=data_type, start_time=t, end_time=t2, channels=channels, archive=archive, path=path, raise_all=raise_all) if tmp is not None: if cadence is not None and cadence <= ad['duration']: tmp.set_cadence(cadence, aggregate=aggregate, inplace=True) data.append(tmp) except KeyboardInterrupt: raise except Exception as e: if raise_all: raise logger.info('Could not load ' + file_name) logger.debug(str(e)) logger.debug(traceback.format_exc()) finally: if temp_file_name: logger.debug('deleting temporary file ' + temp_file_name) os.unlink(temp_file_name) if len(data) == 0: return None r = concatenate(data, sort=False) r.extract(inplace=True, start_time=start_time, end_time=end_time, channels=channels) if cadence is not None and cadence > ad['duration']: # cadence too large to apply on results of loading each file, # apply to combined object r.set_cadence(cadence, aggregate=aggregate, inplace=True) if filter_function: logger.debug('filtering with function %s', filter_function.__name__) r = filter_function(r) return r
def warn_missing_data(data, project, site, now, status_dir, test_mode, config): section_name = 'missing_data' missing_interval = np.timedelta64(1, 'h') timeout = np.timedelta64(1, 'D') if not missing_interval: return if data is None: t = None else: # Find last non-nan value idx = np.nonzero(np.any(np.logical_not(np.isnan(data.data)), axis=0))[0] if len(idx): t = data.sample_start_time[idx[-1]] else: t = None if t is None: # Data is entirely missing. Was expecting 24 hours of data, # with a nominal end time of the end of the current hour. t = dt64.ceil(now, np.timedelta64(1, 'h')) - np.timedelta64(1, 'D') tstr = dt64.strftime(t, '%Y-%m-%d %H:%M:%SUT') if t < now - missing_interval: # Data is missing logger.info(project + '/' + site + ' missing data') if config.has_option(section_name, 'twitter_username'): username = config.get(section_name, 'twitter_username') mesg = expand_string(config.get(section_name, 'twitter_message'), project, site, now, test_mode, missing_start_time=tstr, missing_interval=str(missing_interval)) run_if_timeout_reached(send_tweet, timeout, now, status_dir, func_args=[username, mesg], name=section_name + '_tweet') if config.has_option(section_name, 'facebook_cmd'): fbcmd_opts = config.get(section_name, 'facebook_cmd').split() mesg = expand_string(config.get(section_name, 'facebook_message'), project, site, now, test_mode, missing_start_time=tstr, missing_interval=str(missing_interval)) run_if_timeout_reached(fbcmd, timeout, now, status_dir, func_args=[fbcmd_opts, facebook_mesg], name=section_name + '_facebook') # Email. Leave to the send_email() function to determine if it # is configured since there are many possible settings in the # config file. Run each email job separately in case of # failure to send. func_kwargs = {'missing_start_time': tstr, 'missing_interval': str(missing_interval)} for ejob in get_email_jobs(config, section_name): run_if_timeout_reached(send_email, timeout, now, status_dir, func_args=[config, section_name, ejob, project, site, now, test_mode], func_kwargs=func_kwargs, name=section_name + '_' + ejob)
def make_qdc(self, nquiet=5, channels=None, cadence=np.timedelta64(5, 's').astype('m8[us]'), quiet_days_method=None, smooth=True, plot=False, remove_nans_window=np.timedelta64(10, 'm'), remove_nans_func=np.nanmean): qd = self.get_quiet_days(nquiet=nquiet, channels=channels, cadence=cadence, method=quiet_days_method) axes = None if plot: for q in qd: qst = q.start_time # To overlay quiet days use the interval from the start of each quiet day otherwise the # lines are spread over time and do not overlay. q = copy.deepcopy(q) q.start_time = q.start_time - qst q.end_time = q.end_time - qst q.sample_start_time = q.sample_start_time - qst q.sample_end_time = q.sample_end_time - qst q.plot(title='Quiet days', axes=axes, label=dt64.strftime(qst, '%Y-%m-%d')) axes = plt.gcf().get_axes() for ax in axes: ax.legend(loc='upper left', fontsize='small') sam_st = np.arange(np.timedelta64(0, 's').astype('m8[us]'), np.timedelta64(24, 'h').astype('m8[us]'), cadence) sam_et = sam_st + cadence qdc_data = np.zeros([len(qd[0].channels), len(sam_st)]) count = np.zeros_like(qdc_data) for n in range(nquiet): not_nan = np.logical_not(np.isnan(qd[n].data)) qdc_data[not_nan] += qd[n].data[not_nan] count[not_nan] += 1 qdc_data /= count qdc = MagQDC(project=self.project, site=self.site, channels=qd[0].channels, start_time=np.timedelta64(0, 'h'), end_time=np.timedelta64(24, 'h'), sample_start_time=sam_st, sample_end_time=sam_et, integration_interval=None, nominal_cadence=cadence, data=qdc_data, units=self.units, sort=False) if remove_nans_window and remove_nans_func and np.any(np.isnan(qdc.data)): qdc_no_nans = qdc.sliding_window(remove_nans_func, remove_nans_window) qdc = ap.data.first_non_nan([qdc, qdc_no_nans]) final_fig = None if smooth: if plot: qdc.plot(title='Final QDC', label='Unsmoothed QDC') final_fig = plt.gcf() qdc.smooth(inplace=True) if plot: qdc.plot(title='Final QDC', figure=final_fig, label='Final QDC') for ax in plt.gcf().get_axes(): ax.legend(loc='upper left', fontsize='small') return qdc
k_filename = os.path.join(site_summary_dir, 'rolling_k.png') temp_plot_filename = os.path.join(site_summary_dir, 'rolling_temp.png') volt_plot_filename = os.path.join(site_summary_dir, 'rolling_volt.png') stackplot_filename = os.path.join(summary_dir, 'stackplots', 'rolling.png') activity_plot_filename = os.path.join(summary_dir, 'activity_plots', 'rolling.png') else: mag_plot_filename = \ dt64.strftime(t1, os.path.join(site_summary_dir, '%Y', '%m', site_lc + '_%Y%m%d.png')) qdc_fit_filename = \ dt64.strftime(t1, os.path.join(site_summary_dir, '%Y', '%m', site_lc + '_%Y%m%d_fit.png')) k_filename = \ dt64.strftime(t1, os.path.join(site_summary_dir, '%Y', '%m', site_lc + '_k_%Y%m%d.png')) stackplot_filename = \ dt64.strftime(t1, os.path.join(summary_dir, 'stackplots', '%Y', '%m', '%Y%m%d.png'))
def make_aurorawatch_plot(project, site, st, et, rolling, exif_tags): ''' Load data and make the AuroraWatch activity plot. Plots always cover 24 hours, but may begin at midnight for day plots, or at any other hour for rolling plots. This function uses the previous 72 hours to help fit the quiet-day curve. project: name of project site: name of site st: start time. For day plots this is the start of the day. For rolling plots this is the start of the rolling 24 hour period. et: end time. For day plots this is the start of the following day. For rolling plots it is the end of the 24 hour period. rolling: flag to indicate if rolling plot should also be made. It is not otherwise possible to identify rolling plots which start at midnight. ''' # global mag_fstr global args # Export to global names for debugging global mag_data global mag_qdc global activity day = np.timedelta64(24, 'h') archive, archive_details = ap.get_archive_info(project, site, 'MagData') # Load the data to plot. For rolling plots load upto midnight so # that both the rolling plot and the current day plot can be # generated efficiently. mag_data = my_load_data(project, site, 'MagData', st, dt64.ceil(et, day)) if mag_data is None or \ not np.any(np.logical_not(np.isnan(mag_data.data))): # not .np.any(etc) eliminates empty array or array of just nans logger.info('No magnetic field data') return # Load up some data from previous days to and apply a # least-squares fit to remove baseline drifts. Data from the # current day is not used. This ensures that results do not change # over the current day when new data becomes available. qdc_fit_interval = args.qdc_fit_interval * day fit_et = dt64.ceil(st, day) # Could be doing a rolling plot fit_st = fit_et - qdc_fit_interval fit_data = my_load_data(project, site, 'MagData', fit_st, fit_et) # Load a QDC. mag_qdc = ap.magdata.load_qdc(project, site, st, tries=6, realtime=True) if mag_qdc is None: logger.info('No QDC') elif fit_data is None: # Cannot fit, so assume no errors in QDC errors = [0.0] else: try: # Fit the QDC to the previous data qdc_aligned, errors, fi = mag_qdc.align(\ fit_data, fit=ap.data.Data.minimise_sign_error_fit, plot_fit=args.plot_fit, full_output=True) except Exception as e: logger.warn('Could not fit QDC') logger.info(str(e)) errors = [0.0] else: # Fitted ok, plot if necessary if args.plot_fit: fig = plt.gcf() fig.set_figwidth(6.4) fig.set_figheight(4.8) fig.subplots_adjust(bottom=0.1, top=0.85, left=0.15, right=0.925) fit_fstr = mag_fstr[:(mag_fstr.rindex('.'))] + '_fit.png' mysavefig(fig, dt64.strftime(dt64.ceil(st, day), fit_fstr), exif_tags) # Adjust the quiet day curve with the error obtained by fitting to # previous days. if mag_qdc is None: mag_qdc_adj = None else: mag_qdc_adj = copy.deepcopy(mag_qdc) mag_qdc_adj.data -= errors[0] # Ensure data gaps are marked as such in the plots. Straight lines # across large gaps look bad! mag_data = mag_data.mark_missing_data(cadence=2*mag_data.nominal_cadence) # Do day plot. Trim start time for occasions when making a day # plot simultaneously with a rolling plot. st2 = dt64.ceil(st, day) md_day = mag_data.extract(start_time=st2) act_ki = activity_plot(md_day, mag_qdc_adj, dt64.strftime(st2, mag_fstr), exif_tags, k_index_filename=dt64.strftime(st2, k_fstr)) r = [md_day] r.extend(act_ki) if rolling: # Trim end time md_rolling = mag_data.extract(end_time=et) act_ki_rolling = activity_plot(md_rolling, mag_qdc_adj, rolling_magdata_filename, exif_tags, k_index_filename=rolling_k_filename) r.append(md_rolling) r.extend(act_ki_rolling) return r
if len(n_s) == 1: # Only project given, use all sites for k in ap.projects[n_s[0]].keys(): project_site[n_s[0] + '/' + k] = (n_s[0], k) elif len(n_s) == 2: # Project and site given project_site[s] = tuple(n_s) else: raise Exception('bad value for project/site (' + project_site) t1 = start_time while t1 < end_time: logger.debug('time: %s', dt64.strftime(t1, '%Y-%m-%d')) plt.close('all') t2 = t1 + day t1_eod = dt64.ceil(t1, day) # t1 end of day t2_eod = dt64.ceil(t2, day) # t2 end of day # List of magdata objects for this day mdl_day = [] act_day = [] mdl_rolling = [] act_rolling = [] # Get copyright and attribution data for all sites. License had # better be CC4-BY-NC-SA for all since we are combining them. copyright_list = []
logger.info('Processing %s/%s %s', project, site, dt64.fmt_dt64_range(site_st, site_et)) for t in dt64.dt64_range(site_st, site_et, src_ai['duration']): temp_file_name = None try: if hasattr(dest_path, '__call__'): # Function: call it with relevant information to get # the dest_path dest_file_name = dest_path(t, project=project, site=site, data_type=data_type, archive=dest_an, channels=channels) else: dest_file_name = dt64.strftime(t, dest_path) url_parts = urlparse(dest_file_name) if url_parts.scheme in ('ftp', 'http', 'https'): raise Exception('Cannot store to a remote location') elif url_parts.scheme == 'file': dest_file_name = url_parts.path if os.path.exists(dest_file_name) and not args.overwrite: logger.info('%s already exists', dest_file_name) continue if hasattr(src_path, '__call__'): # Function: call it with relevant information to get # the src_path file_name = src_path(t,
temp_data = my_load_data(project_uc, site_uc, 'TemperatureData', t1, t2_eod) if temp_data is not None: temp_data.set_cadence(np.timedelta64(10, 'm'), inplace=True) if args.rolling: # Rolling plot make_temperature_plot(temp_data.extract(end_time=t2), rolling_tempdata_filename, exif_tags) # Make day plot. Trim data from start because when # --rolling option is given it can include data from # the previous day. make_temperature_plot(temp_data.extract(start_time=t1_eod), dt64.strftime(t1_eod, temp_fstr), exif_tags) voltage_data = None if has_data_of_type(project_uc, site_uc, 'VoltageData'): voltage_data = my_load_data(project_uc, site_uc, 'VoltageData', t1, t2_eod) if voltage_data is not None: voltage_data.set_cadence(np.timedelta64(10, 'm'), inplace=True) if args.rolling: # Rolling plot make_voltage_plot(voltage_data.extract(end_time=t2), rolling_voltdata_filename, exif_tags)
last_data = None for t1 in dt64.dt64_range(site_st, site_et, day): try: t2 = t1 + day if args.missing_only: data = ap.load_data(project, site, 'MagData', t1, t2, archive=bl_archive) if data is not None and np.size(data.data) and np.all( np.isfinite(data.data)): logger.info('baseline data for %s/%s %s already exists', project, site, dt64.strftime(t1, '%Y-%m-%d')) continue # Calculate dates for data to be used for fitting md_mean_time = dt64.mean(t1, t2) + qdc_fit_offset md_st = md_mean_time - qdc_fit_duration / 2 md_et = md_st + qdc_fit_duration if last_data is None or last_data.end_time != md_et - day: # Load entire data block md = ap.load_data(project, site, 'MagData', md_st, md_et, archive=md_archive)
def load_data(project, site, data_type, start_time, end_time, archive=None, channels=None, path=None, load_function=None, raise_all=False, cadence=None, aggregate=None, filter_function=None, use_cache=None, now=None): '''Load data. project: name of the project (upper case) site: site abbreviation (upper case) data_type: class name of the data type to be loaded start_time: start time (inclusive) of the data set end_time: end time (exclusive) of the data set The following optional parameters are recognised: archive: name of the archive. Required if more than one archive is present and there is not an archive called "default". channels: data channel(s) to load. All are loaded if not specified path: URL or file path, specified as a strftime format specifier. Alternatively can be a function reference which is passed the time and returns the filename. If given this overrides the standard load path. load_function: Pass responsibility for loading the data to the given function reference, after validating the input parameters. ''' archive, ad = get_archive_info(project, site, data_type, archive=archive) cad_units = dt64.get_units(ad['nominal_cadence']) start_time = start_time.astype('datetime64[%s]' % cad_units) end_time = end_time.astype('datetime64[%s]' % cad_units) if channels is None: channels = ad['channels'] else: # Could be as single channel name or a list of channels if isinstance(channels, six.string_types): if channels not in ad['channels']: raise Exception('Unknown channel') else: for c in channels: if c not in ad['channels']: raise Exception('Unknown channel') if path is None: path = ad['path'] if load_function is None: load_function = ad.get('load_function') if filter_function is None: filter_function = ad.get('filter_function') if load_function: # Pass responsibility for loading to some other # function. Parameters have already been checked. return load_function(project, site, data_type, start_time, end_time, archive=archive, channels=channels, path=path, raise_all=raise_all, cadence=cadence, aggregate=aggregate, filter_function=filter_function) data = [] for t in dt64.dt64_range(dt64.floor(start_time, ad['duration']), end_time, ad['duration']): # A local copy of the file to be loaded, possibly an # uncompressed version. temp_file_name = None t2 = t + ad['duration'] if hasattr(path, '__call__'): # Function: call it with relevant information to get the path file_name = path(t, project=project, site=site, data_type=data_type, archive=archive, channels=channels) else: file_name = dt64.strftime(t, path) url_parts = urlparse(file_name) if url_parts.scheme in ('ftp', 'http', 'https'): if ad.get('cache_dir'): if now is None: now = np.datetime64('now', 's') dtd = ad.get('data_transfer_delay', np.timedelta64(0, 's')) if use_cache is None: if t2 + dtd < now: uc = True # OK to try fetching from the cache else: uc = False logger.debug('data too new to cache') else: uc = use_cache cache_filename = os.path.normpath( os.path.join(ad['cache_dir'], file_name.replace(':', '/'))) logger.debug('cache file: ' + cache_filename) if uc: if os.path.exists(cache_filename): file_name = cache_filename logger.debug('cache hit') else: file_name = download_url(file_name, dest=cache_filename) else: # Download but discard after use file_name = download_url(file_name) temp_file_name = file_name else: # No cache so discard after use file_name = download_url(file_name) temp_file_name = file_name if file_name is None: continue elif url_parts.scheme == 'file': file_name = url_parts.path if not os.path.exists(file_name): logger.info('missing file %s', file_name) continue # Now only need to access local files if os.path.splitext(url_parts.path)[1] in ('.gz', '.dgz'): # Transparently uncompress gunzipped_file = None try: logger.debug('unzipping %s', file_name) gunzipped_file = NamedTemporaryFile(prefix=__name__, delete=False) with gzip.open(file_name, 'rb') as gzip_file: shutil.copyfileobj(gzip_file, gunzipped_file) gunzipped_file.close() except KeyboardInterrupt: raise except Exception as e: if gunzipped_file: gunzipped_file.close() os.unlink(gunzipped_file.name) gunzipped_file = None continue finally: if temp_file_name: logger.debug('deleting temporary file ' + temp_file_name) os.unlink(temp_file_name) temp_file_name = gunzipped_file.name file_name = temp_file_name logger.info('loading ' + file_name) try: tmp = ad['load_converter'](file_name, ad, project=project, site=site, data_type=data_type, start_time=t, end_time=t2, channels=channels, archive=archive, path=path, raise_all=raise_all) if tmp is not None: if cadence is not None and cadence <= ad['duration']: tmp.set_cadence(cadence, aggregate=aggregate, inplace=True) data.append(tmp) except KeyboardInterrupt: raise except Exception as e: if raise_all: raise logger.info('Could not load ' + file_name) logger.debug(str(e)) logger.debug(traceback.format_exc()) finally: if temp_file_name: logger.debug('deleting temporary file ' + temp_file_name) os.unlink(temp_file_name) if len(data) == 0: return None r = concatenate(data, sort=False) r.extract(inplace=True, start_time=start_time, end_time=end_time, channels=channels) if cadence is not None and cadence > ad['duration']: # cadence too large to apply on results of loading each file, # apply to combined object r.set_cadence(cadence, aggregate=aggregate, inplace=True) if filter_function: logger.debug('filtering with function %s', filter_function.__name__) r = filter_function(r) return r
site_et = ap.get_site_info(project, site, 'end_time') if site_et is None or site_et > et: site_et = et else: site_et = dt64.ceil(site_et, day) t1 = dt64.get_start_of_month(site_st) while t1 < site_et: t2 = dt64.get_start_of_next_month(t1) try: if args.only_missing: mag_qdc = ap.magdata.load_qdc(project, site, t1) if mag_qdc is not None and mag_qdc.data.size != 0 and not np.any( np.isnan(mag_qdc.data)): logger.info('QDC for %s/%s %s already exists', project, site, dt64.strftime(t1, '%Y-%m-%d')) continue kwargs = {} if cadence: kwargs['cadence'] = cadence kwargs['aggregate'] = agg_func mag_data = ap.load_data(project, site, 'MagData', t1, t2, archive=archive, raise_all=args.raise_all, **kwargs)
site_st = dt64.floor(site_st, day) site_et = ap.get_site_info(project, site, 'end_time') if site_et is None or site_et > et: site_et = et else: site_et = dt64.ceil(site_et, day) t1 = dt64.get_start_of_month(site_st) while t1 < site_et: t2 = dt64.get_start_of_next_month(t1) try: if args.only_missing: mag_qdc = ap.magdata.load_qdc(project, site, t1) if mag_qdc is not None and mag_qdc.data.size != 0 and not np.any(np.isnan(mag_qdc.data)): logger.info('QDC for %s/%s %s already exists', project, site, dt64.strftime(t1, '%Y-%m-%d')) continue kwargs = {} if cadence: kwargs['cadence'] = cadence kwargs['aggregate'] = agg_func mag_data = ap.load_data(project, site, 'MagData', t1, t2, archive=archive, raise_all=args.raise_all, **kwargs) if mag_data is not None: if post_cadence: mag_data.set_cadence(post_cadence,
else: site_et = dt64.ceil(site_et, day) logger.info('Processing %s/%s %s', project, site, dt64. fmt_dt64_range(site_st, site_et)) last_data = None for t1 in dt64.dt64_range(site_st, site_et, day): try: t2 = t1 + day if args.missing_only: data = ap.load_data(project, site, 'MagData', t1, t2, archive=bl_archive) if data is not None and np.size(data.data) and np.all(np.isfinite(data.data)): logger.info('baseline data for %s/%s %s already exists', project, site, dt64.strftime(t1, '%Y-%m-%d')) continue # Calculate dates for data to be used for fitting md_mean_time = dt64.mean(t1, t2) + qdc_fit_offset md_st = md_mean_time - qdc_fit_duration/2 md_et = md_st + qdc_fit_duration if last_data is None or last_data.end_time != md_et - day: # Load entire data block md = ap.load_data(project, site, 'MagData', md_st, md_et, archive=md_archive) else: # Load the last day of data and concatenate md = ap.load_data(project, site, 'MagData', md_et - day, md_et, archive=md_archive) if md is None: md = last_data
#!/usr/bin/env python import numpy as np import matplotlib as mpl import auroraplot.dt64tools as dt64 from matplotlib import pyplot as plt plt.close('all') # Define a line from year 1200 to year 2080 x = np.array(dt64.from_YMD([1200,2080],1,1)) y = np.array([0,1]) x = x.astype('M8[ms]') # Plot dt64.plot_dt64(x,y) plt.show() print(dt64.strftime(x[0], 'Start time: %Y-%m-%d %H:%M:%S')) print(dt64.strftime(x[-1], 'End time: %Y-%m-%d %H:%M:%S')) print('Now try zoom and watch the time units adjust automatically')
#!/usr/bin/env python import numpy as np import matplotlib as mpl import auroraplot.dt64tools as dt64 from matplotlib import pyplot as plt plt.close('all') # Define a line from year 1200 to year 2080 x = np.array(dt64.from_YMD([1200, 2080], 1, 1)) y = np.array([0, 1]) x = x.astype('M8[ms]') # Plot dt64.plot_dt64(x, y) plt.show() print(dt64.strftime(x[0], 'Start time: %Y-%m-%d %H:%M:%S')) print(dt64.strftime(x[-1], 'End time: %Y-%m-%d %H:%M:%S')) print('Now try zoom and watch the time units adjust automatically')
logger.info('Processing %s/%s %s', project, site, dt64. fmt_dt64_range(site_st, site_et)) for t in dt64.dt64_range(site_st, site_et, src_ai['duration']): temp_file_name = None try: if hasattr(dest_path, '__call__'): # Function: call it with relevant information to get # the dest_path dest_file_name = dest_path(t, project=project, site=site, data_type=data_type, archive=dest_an, channels=channels) else: dest_file_name = dt64.strftime(t, dest_path) url_parts = urlparse(dest_file_name) if url_parts.scheme in ('ftp', 'http', 'https'): raise Exception('Cannot store to a remote location') elif url_parts.scheme == 'file': dest_file_name = url_parts.path if os.path.exists(dest_file_name) and not args.overwrite: logger.info('%s already exists', dest_file_name) continue if hasattr(src_path, '__call__'): # Function: call it with relevant information to get # the src_path
def aurora_alert(activity, combined, now, status_dir, test_mode, ignore_timeout, config): assert activity.thresholds.size == 4, \ 'Incorrect number of activity thresholds' assert activity.sample_start_time[-1] <= now \ and activity.sample_end_time[-1] >= now, \ 'Last activity sample for wrong time' assert np.all(np.logical_or(activity.data >= 0, np.isnan(activity.data))), \ 'Activity data must be >= 0' if np.isnan(activity.data[0,-1]): return n = np.where(activity.data[0,-1] >= activity.thresholds)[0][-1] logger.debug('Activity level for ' + activity.project + '/' + activity.site + ': ' + str(n)) section_name = 'aurora_alert_' + str(n) if n == 0: # No significant activity return elif not config.has_section(section_name): logger.debug('No [' + section_name + '] section found') return nowstr = dt64.strftime(now, '%Y-%m-%d %H:%M:%SUT') tweet_timeout = facebook_timeout = email_timeout = np.timedelta64(12, 'h') # Compute filename to use for timeout, and the names of any other # files which must be updated. job_base_name = section_name if not combined: job_base_name += '_' + activity.project.lower() + '_' \ + activity.site.lower() tweet_files = [] facebook_files = [] email_files = [] for i in range(1, n+1): tweet_files.append(job_base_name + '_tweet') facebook_files.append(job_base_name + '_facebook') email_files.append(job_base_name + '_') # Must append the ejob later # Tweet if config.has_option(section_name, 'twitter_username'): twitter_username = config.get(section_name, 'twitter_username') twitter_mesg = expand_string(config.get(section_name, 'twitter_message'), activity.project, activity.site, now, test_mode) run_if_timeout_reached(send_tweet, tweet_timeout, now, status_dir, func_args=[twitter_username, twitter_mesg], name=tweet_files[-1], also_update=tweet_files[:-1]) else: logger.debug('Sending tweet not configured') # Post to facebook if config.has_option(section_name, 'facebook_cmd'): facebook_mesg = expand_string(config.get(section_name, 'facebook_message'), activity.project, activity.site, now, test_mode) fbcmd_opts = config.get(section_name, 'facebook_cmd').split() run_if_timeout_reached(fbcmd, facebook_timeout, now, status_dir, func_args=[fbcmd_opts, facebook_mesg], name=facebook_files[-1], also_update=facebook_files[:-1]) else: logger.debug('Facebook posting not configured') # Email. Leave to the send_email() function to determine if it is # configured since there are many possible settings in the config # file. for ejob in get_email_jobs(config, section_name): run_if_timeout_reached(send_email, email_timeout, now, status_dir, func_args=[config, section_name, ejob, activity.project, activity.site, now, test_mode], name=email_files[-1] + ejob, also_update=map(lambda x: x + ejob, email_files[:-1]))
def load_qdc(project, site, time, archive=None, channels=None, path=None, tries=1, realtime=False, load_function=None, full_output=False): '''Load quiet-day curve. project: name of the project (upper case) site: site abbreviation (upper case) time: a time within the quiet-day curve period The following optional parameters are recognised: archive: name of the archive. Required if more than one archive is present and there is not an archive called "default". channels: data channel(s) to load. All are loaded if not specified. tries: The number of attempts to load a quiet-day curve. If >1 and the first attempt is not successful then an attempt will be made to load the previous QDC. path: URL or file path, specified as a strftime format specifier. Alternatively can be a function reference which is passed the time and returns the filename. If given this overrides the standard load path. load_function: Pass responsibility for loading the data to the given function reference, after validating the input parameters. ''' data_type = 'MagQDC' archive, ad = ap.get_archive_info(project, site, data_type, archive=archive) if channels is not None: # Ensure it is a 1D numpy array channels = np.array(channels).flatten() for c in channels: if c not in ad['channels']: raise ValueError('Unknown channel (%s)' % str(c)) else: channels = ad['channels'] if path is None: path = ad['path'] if load_function is None: load_function = ad.get('load_function', None) if tries is None: tries = 1 if load_function: # Pass responsibility for loading to some other # function. Parameters have already been checked. return load_function(project, site, data_type, time, archive=archive, channels=channels, path=path, tries=tries, realtime=realtime, full_output=full_output) data = [] t = dt64.get_start_of_month(time) if realtime: # For realtime use the QDC for the month is (was) not # available, so use the previous month's QDC t = dt64.get_start_of_previous_month(t) # Early in the month the previous motnh's QDC was probably not # computed, so use the month before that qdc_rollover_day = ad.get('qdc_rollover_day', 4) if dt64.get_day_of_month(time) < qdc_rollover_day: t = dt64.get_start_of_previous_month(t) for n in range(tries): try: if hasattr(path, '__call__'): # Function: call it with relevant information to get the path file_name = path(t, project=project, site=site, data_type=data_type, archive=archive, channels=channels) else: file_name = dt64.strftime(t, path) logger.info('loading ' + file_name) r = ad['load_converter'](file_name, ad, project=project, site=site, data_type=data_type, start_time=np.timedelta64(0, 'h'), end_time=np.timedelta64(24, 'h'), archive=archive, channels=channels, path=path) if r is not None: r.extract(inplace=True, channels=channels) if full_output: r2 = {'magqdc': r, 'tries': n + 1, 'maxtries': tries} return r2 else: return r finally: # Go to start of previous month t = dt64.get_start_of_month(t - np.timedelta64(24, 'h')) return None