def emit_result_stats(self, result): """Emit stats about how well the result satisfied the query.""" if not self.collect_metrics(): return allow_fallback = bool(self.api_key and self.api_key.can_fallback() or False) if result is None: data_accuracy = DataAccuracy.none source = None else: data_accuracy = result.data_accuracy source = result.source status = "miss" if data_accuracy <= self.expected_accuracy: # equal or better / smaller accuracy status = "hit" tags = [ "fallback_allowed:%s" % str(allow_fallback).lower(), "accuracy:%s" % self.expected_accuracy.name, "status:%s" % status, ] if status == "hit" and source: tags.append("source:%s" % source.name) self._emit_stat("result", tags) bind_threadlocal( fallback_allowed=allow_fallback, accuracy=data_accuracy.name, accuracy_min=self.expected_accuracy.name, result_status=status, )
def prepare_response(self, result, api_key): response = { "location": { "lat": result["lat"], "lng": result["lon"] }, "accuracy": result["accuracy"], } if result["fallback"]: response["fallback"] = result["fallback"] # Create a signature of the response, and look for unique responses response_content = json.dumps(response, sort_keys=True) response_sig = generate_signature( "response-sig", response_content, self.request.client_addr, self.request.url, # Includes the API, API key ) today = utcnow().date().isoformat() key = f"response-sig:{self.view_type}:{api_key.valid_key}:{today}" with self.redis_client.pipeline() as pipe: pipe.pfadd(key, response_sig) pipe.expire(key, 90000) # 25 hours new_response, _ = pipe.execute() bind_threadlocal(api_repeat_response=not new_response, api_response_sig=response_sig[:16]) return response
def emit_source_stats(self, source, results): """Emit stats about how well the source satisfied the query.""" if not self.collect_metrics(): return # If any one of the results was good enough, consider it a hit. status = "miss" result = None for result in results: if result.data_accuracy <= self.expected_accuracy: # equal or better / smaller accuracy status = "hit" break tags = [ "source:%s" % source.name, "accuracy:%s" % self.expected_accuracy.name, "status:%s" % status, ] self._emit_stat("source", tags) bind_prefix = f"source_{source.name}" bind_stats = { bind_prefix + "_accuracy": result and result.data_accuracy.name or None, bind_prefix + "_accuracy_min": self.expected_accuracy.name, bind_prefix + "_status": status, } bind_threadlocal(**bind_stats)
def log_count(self, valid_key): METRICS.incr( self.view_type + ".request", tags=["path:" + self.metric_path, "key:" + valid_key], ) bind_threadlocal(api_key=valid_key, api_path=self.metric_path, api_type=self.view_type)
def test_bind_and_merge(self): """ Binding a variable causes it to be included in the result of merge_threadlocal. """ bind_threadlocal(a=1) assert {"a": 1, "b": 2} == merge_threadlocal(None, None, {"b": 2})
def test_clear(self): """ The thread-local context can be cleared, causing any previously bound variables to not be included in merge_threadlocal's result. """ bind_threadlocal(a=1) clear_threadlocal() assert {"b": 2} == merge_threadlocal(None, None, {"b": 2})
def test_multiple_binds(self): """ Multiple calls to bind_threadlocal accumulate values instead of replacing them. """ bind_threadlocal(a=1, b=2) bind_threadlocal(c=3) assert {"a": 1, "b": 2, "c": 3} == merge_threadlocal( None, None, {"b": 2} )
def parse_apikey(self): try: api_key_text = self.request.GET.get("key", None) except Exception: api_key_text = None if api_key_text: # Validate key and potentially return None valid_key = validated_key(api_key_text) if valid_key is None: bind_threadlocal(invalid_api_key=api_key_text) return valid_key return None
def bind(key_val: Dict[str, str], clear_thread_local=False): """ :param key_val: :type key_val: Dict[str, str] :param clear_thread_local: :type clear_thread_local: bool :return: :rtype: None """ if clear_thread_local: LogUtil.clear_threadlocal() bind_threadlocal(**key_val)
def cross_val(X, y, model_args, cross_val_args): cv = KFold(**cross_val_args) clf = RandomForestClassifier(**model_args) for i, (train_index, test_index) in enumerate(cv.split(X, y)): bind_threadlocal(cv_split=i) X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] clf.fit(X_train, y_train) in_acc = clf.score(X_train, y_train) out_acc = clf.score(X_test, y_test) log.info('accuracy', set='test', value=out_acc) log.info('accuracy', set='train', value=in_acc)
def test_unbind_threadlocal(self): """ Test that unbinding from threadlocal works for keys that exist and does not raise error when they do not exist. """ clear_threadlocal() bind_threadlocal(a=234, b=34) assert {"a": 234, "b": 34} == merge_threadlocal_context(None, None, {}) unbind_threadlocal("a") assert {"b": 34} == merge_threadlocal_context(None, None, {}) unbind_threadlocal("non-existing-key") assert {"b": 34} == merge_threadlocal_context(None, None, {})
def _init_threadlocals(filename, settings, threadlocals): threadlocals.settings = settings local_types = find_local_types(filename) threadlocals.signatures = local_types.signatures threadlocals.import_strategist = ImportStrategist(local_types) threadlocals.strategy_to_names = {} # per-file counters threadlocals.docstring_count = 0 threadlocals.typed_docstring_count = 0 threadlocals.comment_count = 0 threadlocals.warning_count = 0 threadlocals.error_count = 0 # for the structlog logger (it manages its own threadlocals): clear_threadlocal() bind_threadlocal(filename=filename)
def log_ip_and_rate_limited(self, valid_key, maxreq): # Log IP addr = self.request.client_addr if not addr: # Use localhost as a marker addr = "127.0.0.1" if isinstance(addr, bytes): addr = addr.decode("ascii") try: ip = str(ip_address(addr)) except ValueError: ip = "127.0.0.1" now = util.utcnow() log_ip_key = "apiuser:{api_type}:{key}:{date}".format( api_type=self.view_type, key=valid_key, date=now.date().strftime("%Y-%m-%d")) rate_key = "apilimit:{key}:{path}:{time}".format( key=valid_key, path=self.metric_path, time=now.strftime("%Y%m%d")) should_limit = False try: with self.redis_client.pipeline() as pipe: pipe.pfadd(log_ip_key, ip) pipe.expire(log_ip_key, 691200) # 8 days pipe.incr(rate_key, 1) pipe.expire(rate_key, 90000) # 25 hours new_ip, _, limit_count, _ = pipe.execute() log_params = { "api_key_count": limit_count, "api_key_repeat_ip": new_ip == 0, } if maxreq: should_limit = limit_count > maxreq log_params["rate_quota"] = maxreq log_params["rate_remaining"] = max(0, maxreq - limit_count) log_params["rate_allowed"] = not should_limit bind_threadlocal(**log_params) except RedisError: self.raven_client.captureException() return should_limit
def prepare_response(self, result, api_key): response = { "location": { "lat": result["lat"], "lng": result["lon"] }, "accuracy": result["accuracy"], } if result["fallback"]: response["fallback"] = result["fallback"] # Create a signature of the response, and look for unique responses response_content = json.dumps(response, sort_keys=True) response_sig = generate_signature( "response-sig", response_content, self.request.client_addr, self.request.url, # Includes the API, API key ) bind_threadlocal(api_response_sig=response_sig[:16]) return response
def __call__(self): """Execute the view and return a response.""" api_key = None api_key_text = self.parse_apikey() skip_check = False if api_key_text is None: self.log_count("none") if self.error_on_invalidkey: raise self.prepare_exception(InvalidAPIKey()) if api_key_text is not None: try: api_key = get_key(self.request.db_session, api_key_text) except (DatabaseError, DBAPIError): # if we cannot connect to backend DB, skip api key check skip_check = True self.raven_client.captureException() bind_threadlocal( api_key=api_key_text, api_path=self.metric_path, api_type=self.view_type, api_key_db_fail=True, ) if api_key is not None: valid_key = api_key.valid_key if api_key.allowed(self.view_type): self.log_count(valid_key) # Potentially avoid overhead of Redis connection. if self.ip_log_and_rate_limit: if self.log_ip_and_rate_limited(valid_key, api_key.maxreq): raise self.prepare_exception(DailyLimitExceeded()) else: self.log_count("invalid") # Switch "invalid" with real key, add "api_key_allowed" bind_threadlocal(api_key=valid_key, api_key_allowed=False) if self.error_on_invalidkey: raise self.prepare_exception(InvalidAPIKey()) elif skip_check: pass else: if api_key_text is not None: self.log_count("invalid") bind_threadlocal(invalid_api_key=api_key_text) if self.error_on_invalidkey: raise self.prepare_exception(InvalidAPIKey()) # If we failed to look up an ApiKey, create an empty one # rather than passing None through if api_key is None: api_key = Key() return self.view(api_key)
def run(self): clear_threadlocal() bind_threadlocal(slot=self._slot, cell_id=self._cell_infoset.fetch('.id')) workflow_log = self._workflow_log log.info('launching workflow') workflow_log.append( dict(action='lvc recovery', event='start', ts=time.time())) lvc_outcome = low_voltage_recovery(self._sess, self._slot, self._queue) workflow_log.append( dict(action='lvc recovery', event='end', outcome=lvc_outcome, ts=time.time())) if lvc_outcome['ok']: # Take the results from the LVC (if there are any) workflow_log.main_event['results'].update( lvc_outcome.get('results', {})) workflow_log.append( dict(action='capacity measure', event='start', ts=time.time())) mcap_outcome = measure_capacity(self._sess, self._slot, self._queue) workflow_log.append( dict(action='capacity measure', event='end', outcome=mcap_outcome, ts=time.time())) if mcap_outcome['ok']: # Take the results from the capacity measurement (if there are any) workflow_log.main_event['results'].update( mcap_outcome.get('results', {})) else: log.warning('failed capacity measurement', outcome=mcap_outcome) self._cell_infoset.put('.props.tags.workflow_failure', True) self._cell_infoset.put( '.props.workflow_failure_outcome', dict(state_text=mcap_outcome['state_text'], status_text=mcap_outcome['status_text'])) status_text = mcap_outcome['status_text'] if status_text == StatusStrings.HOT_CHARGED or status_text == StatusStrings.HOT_DISCHARGED: self._cell_infoset.put('.props.tags.excessive_heat', True) else: log.warning('failed low voltage recovery attempt', outcome=lvc_outcome) self._cell_infoset.put('.props.tags.workflow_failure', True) self._cell_infoset.put( '.props.workflow_failure_outcome', dict(state_text=mcap_outcome['state_text'], status_text=mcap_outcome['status_text'])) self._cell_infoset.put('.props.tags.precharge_fail', True) log.info('workflow finished')
def __init__( self, fallback=None, ip=None, blue=None, cell=None, wifi=None, api_key=None, api_type=None, session=None, http_session=None, geoip_db=None, ): """ A class representing a concrete query. :param fallback: A dictionary of fallback options. :type fallback: dict :param ip: An IP address, e.g. 127.0.0.1. :type ip: str :param blue: A list of bluetooth query dicts. :type blue: list :param cell: A list of cell query dicts. :type cell: list :param wifi: A list of wifi query dicts. :type wifi: list :param api_key: An ApiKey instance for the current query. :type api_key: :class:`ichnaea.models.api.ApiKey` :param api_type: The type of query API, for example `locate`. :type api_type: str :param session: An open database session. :param http_session: An open HTTP/S session. :param geoip_db: A geoip database. :type geoip_db: :class:`~ichnaea.geoip.GeoIPWrapper` """ self.geoip_db = geoip_db self.http_session = http_session self.session = session self.fallback = fallback self.ip = ip self.blue = blue self.cell = cell self.wifi = wifi self.api_key = api_key if api_type not in (None, "region", "locate"): raise ValueError("Invalid api_type.") self.api_type = api_type bind_threadlocal( region=self.region, blue=len(blue or []), blue_valid=len(self.blue), cell=len(cell or []), cell_valid=len(self.cell), wifi=len(wifi or []), wifi_valid=len(self.wifi), has_geoip=bool(self.geoip), has_ip=bool(ip), )
def apply_gaussian_smoothing(self, timeseries_type, plot=False, smoothed_max_threshold=5): """ Apply a rolling Gaussian window to smooth the data. This signature and returns match get_time_series, but will return a subset of the input time-series starting at the first non-zero value. Parameters ---------- timeseries_type: TimeseriesType Which type of time-series to use. plot: bool If True, plot smoothed and original data. smoothed_max_threshold: int This parameter allows you to filter out entire series (e.g. NEW_DEATHS) when they do not contain high enough numeric values. This has been added to account for low-level constant smoothed values having a dispropotionate effect on our final R(t) calculation, when all of their values are below this parameter. Returns ------- dates: array-like Input data over a subset of indices available after windowing. times: array-like Output integers since the reference date. smoothed: array-like Gaussian smoothed data. """ timeseries_type = TimeseriesType(timeseries_type) dates, times, timeseries = self.get_timeseries(timeseries_type) bind_threadlocal(timeseries_type=timeseries_type.value) # Hospitalizations have a strange effect in the first few data points across many states. # Let's just drop those.. if timeseries_type in ( TimeseriesType.CURRENT_HOSPITALIZATIONS, TimeseriesType.NEW_HOSPITALIZATIONS, ): dates, times, timeseries = dates[2:], times[:2], timeseries[2:] # Remove Outliers Before Smoothing. Replaces a value if the current is more than 10 std # from the 14 day trailing mean and std timeseries = replace_outliers(pd.Series(timeseries)) smoothed = (timeseries.rolling( self.window_size, win_type="gaussian", min_periods=self.kernel_std, center=True).mean(std=self.kernel_std).round()) nonzeros = [idx for idx, val in enumerate(smoothed) if val != 0] if smoothed.empty: idx_start = 0 elif max(smoothed) < smoothed_max_threshold: # skip the entire array. idx_start = len(smoothed) else: idx_start = nonzeros[0] smoothed = smoothed.iloc[idx_start:] original = timeseries.loc[smoothed.index] if plot: plt.scatter( dates[-len(original):], original, alpha=0.3, label=timeseries_type.value.replace("_", " ").title() + "Shifted", ) plt.plot(dates[-len(original):], smoothed) plt.grid(True, which="both") plt.xticks(rotation=30) plt.xlim(min(dates[-len(original):]), max(dates) + timedelta(days=2)) plt.legend() return dates, times, smoothed
def __init__( self, fips, window_size=14, kernel_std=5, r_list=np.linspace(0, 10, 501), process_sigma=0.05, ref_date=datetime(year=2020, month=1, day=1), confidence_intervals=(0.68, 0.95), min_cases=5, min_deaths=5, include_testing_correction=True, ): np.random.seed(InferRtConstants.RNG_SEED) # Param Generation used for Xcor in align_time_series, has some stochastic FFT elements. self.fips = fips self.r_list = r_list self.window_size = window_size self.kernel_std = kernel_std self.process_sigma = process_sigma self.ref_date = ref_date self.confidence_intervals = confidence_intervals self.min_cases = min_cases self.min_deaths = min_deaths self.include_testing_correction = include_testing_correction if len(fips) == 2: # State FIPS are 2 digits self.agg_level = AggregationLevel.STATE self.state_obj = us.states.lookup(self.fips) self.state = self.state_obj.name ( self.times, self.observed_new_cases, self.observed_new_deaths, ) = load_data.load_new_case_data_by_state( self.state, self.ref_date, include_testing_correction=self.include_testing_correction, ) ( self.hospital_times, self.hospitalizations, self.hospitalization_data_type, ) = load_data.load_hospitalization_data_by_state( state=self.state_obj.abbr, t0=self.ref_date) self.display_name = self.state else: self.agg_level = AggregationLevel.COUNTY self.geo_metadata = (load_data.load_county_metadata().set_index( "fips").loc[fips].to_dict()) self.state = self.geo_metadata["state"] self.state_obj = us.states.lookup(self.state) self.county = self.geo_metadata["county"] if self.county: self.display_name = self.county + ", " + self.state else: self.display_name = self.state ( self.times, self.observed_new_cases, self.observed_new_deaths, ) = load_data.load_new_case_data_by_fips( self.fips, t0=self.ref_date, include_testing_correction=self.include_testing_correction, ) ( self.hospital_times, self.hospitalizations, self.hospitalization_data_type, ) = load_data.load_hospitalization_data(self.fips, t0=self.ref_date) clear_threadlocal() bind_threadlocal(Rt_Inference_Target=self.display_name) log.info("Running") self.case_dates = [ ref_date + timedelta(days=int(t)) for t in self.times ] if self.hospitalization_data_type: self.hospital_dates = [ ref_date + timedelta(days=int(t)) for t in self.hospital_times ] self.default_parameters = ParameterEnsembleGenerator( fips=self.fips, N_samples=500, t_list=np.linspace(0, 365, 366)).get_average_seir_parameters() # Serial period = Incubation + 0.5 * Infections self.serial_period = (1 / self.default_parameters["sigma"] + 0.5 * 1 / self.default_parameters["delta"]) # If we only receive current hospitalizations, we need to account for # the outflow to reconstruct new admissions. if (self.hospitalization_data_type is load_data.HospitalizationDataType.CURRENT_HOSPITALIZATIONS): los_general = self.default_parameters[ "hospitalization_length_of_stay_general"] los_icu = self.default_parameters[ "hospitalization_length_of_stay_icu"] hosp_rate_general = self.default_parameters[ "hospitalization_rate_general"] hosp_rate_icu = self.default_parameters["hospitalization_rate_icu"] icu_rate = hosp_rate_icu / hosp_rate_general flow_out_of_hosp = self.hospitalizations[:-1] * ( (1 - icu_rate) / los_general + icu_rate / los_icu) # We are attempting to reconstruct the cumulative hospitalizations. self.hospitalizations = np.diff( self.hospitalizations) + flow_out_of_hosp self.hospital_dates = self.hospital_dates[1:] self.hospital_times = self.hospital_times[1:] self.log_likelihood = None