def __connection_is_valid(self) -> bool: """Checks if the current database connection is valid.""" if not self.__is_open(): logger.info("Database connection is closed!") return False return True
def query_timed(self, url: str) -> Dict[str, Any]: start_time: float = time.time() response: Dict[str, Any] = self.__get(url) duration_sec: float = time.time() - start_time if logger.isEnabledFor(logging.DEBUG): logger.debug("OSM query took ~%s: %s", int(duration_sec), url) else: logger.info("OSM query took ~%s: %s chars", duration_sec, len(url)) return response
def query_pd(self, query: str, *argv, **kwargs) -> pd.DataFrame: """ Queries the database and returns the result as a DataFrame.""" logger.info(f'Querying SQL with { query }') if not self.__connection_is_valid(): logger.info("Trying to reconnect...") self.connect() data = pd.DataFrame() return pd.read_sql(query, self.__db, *argv, **kwargs)
def timer(message): """Context manager for reporting time measurements""" tic = time.perf_counter() extra = "" try: yield except Exception: extra = " (failed)" raise finally: toc = time.perf_counter() ms = int(1000 * (toc - tic)) logger.info(f"{message}{extra}: {ms}ms")
def _compute_graph_edges(self): """ Computes contacts and adds as edges to graph """ glue_below_duration = self.params['bt_glue_below_duration'] min_duration = self.params['bt_min_duration'] timeFrom = self.params['timeFrom'] timeTo = self.params['timeTo'] outlier_threshold = self.params['bt_outlier_threshold'] bt_data = self._bt_data logger.info("Building Bluetooth contact graph edges") for uuid1 in self.query_uuids: trajectory_uuid1 = self._load_trajectory(uuid1) for uuid2 in tqdm(self.uuids): if uuid1 == uuid2: continue # Extract relevant part of pandas frame bt_data_local = bt_data.loc[( (bt_data['uuid'] == uuid1) & (bt_data['paireddeviceid'] == uuid2)) | ( (bt_data['paireddeviceid'] == uuid1) & (bt_data['uuid'] == uuid2))] if len(bt_data_local) == 0: # No contacts continue trajectories = { uuid1: trajectory_uuid1, uuid2: self._load_trajectory(uuid2) } # Construct contact list t1 = trajectories[uuid1] t2 = trajectories[uuid2] bt_iterator = BluetoothContactDetailsIterator( bt_data_local, glue_below_duration) contacts = ContactList([ BluetoothContact(t1, t2, contact_details) for contact_details in bt_iterator ]) contacts = contacts.filter(min_duration=min_duration) self._add_contacts(uuid1, uuid2, contacts) logger.info("Building Bluetooth contact graph edges")
def log_contacts(uuid, contacts, device_info, add_random_salt=False): """ Writes a list [contact1_dict, contact2_dict,...] into the given txt file where each row corresponds to one case. :params uuid: Patient uuid (string) :params contacts: contacts argument of a ContactGraphResult object :params device_info: dict of uuid -> List of device info tuples :params logfile: File to log the contacts :params mode: if 'a' contacts are appended to logfile, if 'w' previous content is overwritten overwritten :params add_random_salt: If true adds a random combination of letters infront of uuids before hashing, where combination changes on each call of function. """ contacts_list = contacts_to_dicts(uuid, contacts, add_random_salt=add_random_salt, device_info=device_info) for contact in contacts_list: logger.info(contact)
def contacts_with(self, uuid): """ Returns a list with all contacts with a given uuid. :param uuid: The uuid of interest :return: A list of ContactList objects """ if uuid in self.uuids: contacts = [ (uuid1, uuid2, edge["contact_list"]) for uuid1, uuid2, edge in self._G.edges(uuid, data=True) ] return ContactGraphResult(uuid, contacts) else: logger.info( f"{self.__class__.__name__}: No contacts for {uuid} - returning empty list" ) return ContactGraphResult(uuid, [])
def nested_get(dictionary, key, debug=False): '''Get value from nested dictionary''' assert isinstance(key, tuple) debug and logger.info(f'Get {key} {list(dictionary.keys())}') key0, *keys = key if not keys: return dictionary[key0] return nested_get(dictionary[key0], tuple(keys), debug)
def _load_bt_data(self): # Load data from database assert (len(self.query_uuids) == 1 ) # FIXME: support multiple query uuids query_uuid = self.query_uuids[0] timeFrom = self.params['timeFrom'] timeTo = self.params['timeTo'] dt_threshold = self.params['bt_dt_threshold'] logger.info( f"BTContactGraph: Loading BT contacts from SQL server for uuid {query_uuid}." ) self._bt_data = load_azure_data_bluetooth(query_uuid, timeFrom, timeTo, dt_threshold=dt_threshold) logger.info( "BTContactGraph: Finished loading BT contacts from SQL server")
def query(self, query: str, *argv, **kwargs) -> pyodbc.Cursor: """ Queries the database and returns the result as a Cursor.""" logger.info(f'Querying SQL with { query }') if not self.__connection_is_valid(): logger.info("Trying to reconnect...") self.connect() cursor = self.__db.cursor() try: cursor.execute(query, *argv, **kwargs) except Exception as e: logger.error(f'Error querying SQL with { query } | { e }') raise return cursor
def nested_set(dictionary, key, value, debug=False): '''Set value of nested dictionary''' assert isinstance(key, tuple) debug and logger.info(f'Set {key} {list(dictionary.keys())}') key0, *keys = key if not keys: dictionary[key0] = value return dictionary return nested_set(dictionary[key0], tuple(keys), value, debug)
def __init__(self, query_uuids, params): """ Constructs a contact graph for a list of query uuids. :params: query_uuids: A list of uuids to query :params: params: A parameter dictionary used for the graph computations """ logger.info("Building contact graph") self.query_uuids = query_uuids self.params = params self._G = nx.Graph() self._compute_graph_nodes() self._compute_graph_edges() # Collect device info for all participant uuids self.node_device_info = load_device_info(list(self._G.nodes)) logger.info("Finished building contact graph")
def set_analysis_period(params, timeFrom, timeTo): """ Sets the flags params["timeFrom"] and params["timeTo"] """ if timeTo is not None: params["timeTo"] = timeTo else: params["timeTo"] = datetime.utcnow().replace(microsecond=0) if timeFrom is not None: params["timeFrom"] = timeFrom params["analysis_duration_in_days"] = (params['timeTo'] - params['timeFrom']).days else: params["timeFrom"] = (params["timeTo"] - timedelta(days=params["analysis_duration_in_days"])).replace(microsecond=0) # Ensure that we work in UTC timezone - this should not really be necessary if we work with # time-aware DateTime objects, but letæ's be on the safe side params["timeFrom"] = params["timeFrom"].astimezone(timezone.utc) params["timeTo"] = params["timeTo"].astimezone(timezone.utc) logger.info(f"Analysis period: {params['timeFrom'].isoformat()} - {params['timeTo'].isoformat()}")
def query_multiple_mt(self, queries, wrapper) -> List[Dict[str, Any]]: ordered_output: List[Dict[str, Any]] = [{}] * len(queries) success: int = 0 logger.info("Starting %s OSM requests...", len(queries)) with concurrent.futures.ThreadPoolExecutor( max_workers=self.max_workers) as executor: futures = {} for index, url in enumerate(queries): futures[executor.submit(partial(self.__get, wrapper(url)))] = index for future in concurrent.futures.as_completed(futures): try: response = future.result() ordered_output[futures[future]] = response success += 1 except Exception as ex: if self.verbose > 0: logger.warning(ex) logger.info("Successfully processed %s / %s OSM requests.", success, len(queries)) return ordered_output
def load_device_info(uuids): '''Return a dictionary of uuid -> List of device info tuples''' if not __CONFIG__.features.device_info: logger.info("Loading device info is disabled in config.") return defaultdict(list) logger.info("Loading device info") if isinstance(uuids, str): uuids = (uuids, ) uuids = set(uuids) device_info = defaultdict(list) query_template = "SELECT * FROM getDeviceInformationSingle('%s')" with timer("db connect"): db = connect_to_azure_database() for uuid in uuids: query = query_template % uuid with timer("db query getDeviceInformationSingle"): frame = pd.read_sql_query(query, con=db) # NOTE: it seems there are some different conventions for naming # e.g. ios10.1 and ios101 are (probably) the same thing and we might # want to merge these frame is not None and device_info[uuid].extend( zip(frame['platform'], frame['model'], frame['appversion'])) db.close() logger.info("Finished loading device info") return device_info
def _compute_graph_edges(self, dist_function=convolution): """ Constructs the edges of the graph by computing the contacts of each trajectory pair. The distance function is user-specific. """ dist_func_options = self.params['filter_options'] allowed_jump = self.params['allowed_jump'] hard_time_gap = self.params['max_interpol_in_h'] glue_below_duration = self.params['glue_below_duration'] min_duration = self.params['min_duration'] logger.info("Building GPS contact graph edges") # Loop over trajectory pairs for i, uuid1 in enumerate(self.query_uuids): if uuid1 in self._trajectories.keys(): t1 = self._trajectories[uuid1] # self._G.add_nodes(uuid1) for uuid2 in tqdm(self.uuids): if uuid2 == uuid1: # Contact with themselves is not relevant continue # Find contact and add to graph t2 = self._trajectories[uuid2] contacts = get_gps_contacts_from_trajectories( t1, t2, allowed_jump, hard_time_gap, glue_below_duration, dist_function, dist_func_options) contacts = contacts.filter(min_duration=min_duration) if len(contacts) > 0: self._add_contacts(uuid1, uuid2, contacts) else: logger.info("No trajectory corresponds to that uuid. \n") logger.info("Finished building GPS contact graph edges")
def query_hits_partial_trajectory( self, query_type_names: Iterable[str], bounding_boxes: List[BoundingBox], containing_box: BoundingBox, query_types: Iterable[QueryType], type_queries: Iterable[str]) -> List[Dict[str, Any]]: """ Query for the POIs in the containing box, then use the bounding_boxes (originally submitted) to filter away the POIs that fell outside of the original boxes. Possible CPU bottleneck, so it logs preocessing time. :param query_type_names: Query types by name :param bounding_boxes: Bounding boxes originally submitted :param containing_box: Bounding box containing all the bounding boxes :param query_types: QueryType objects :param type_queries: :return: """ url = self.__global_query_wrapper(containing_box, type_queries) loggable_types = ", ".join(query_type_names) logger.info( f"OSM: trajectory[{len(bounding_boxes)}]: {loggable_types}: {containing_box.area_str()}" ) logger.debug(f"OSM: {url}") results = self.query_timed(url) start_time: float = time.time() elements = results['elements'] bounding_box_hits = [ box.contained_elements(elements) for box in bounding_boxes ] query_typed_hits_per_box = [] for box_hits in bounding_box_hits: for query_type in query_types: query_typed_hits_per_box.append( dict(elements=query_type.matching_elements(box_hits))) duration_sec: float = time.time() - start_time logger.info("%s elements from %s boxes -> %s processed in %sms", len(elements), len(bounding_boxes), str(containing_box), int(1000 * duration_sec)) return query_typed_hits_per_box
def connect(self) -> None: """Connects to the database. Will reuse connection if a connection is open and the connection string has not changed. """ logger.info("Connecting to database...") if self.__db is not None and self.__connection_is_valid(): logger.info( "A connection is already open! Reusing old connection.") return try: self.__db = pyodbc.connect(self.__connection_string) logger.info("Database connection successful!") except Exception as e: logger.error(f"Database connection failed! | { e }") raise
def _load_trajectory(self, uuid): """ Loads GPS trajectory for a uuid for the analysis period """ # Get the trajectory for all uuids params = self.params dt_threshold = params['gps_dt_threshold'] dx_threshold = params['gps_dx_threshold'] query = f"SELECT * FROM getTrajectorySpeed('{uuid}','{params['timeFrom']}','{params['timeTo']}')" logger.info( f"BTContactGraph: Calling getTrajectorySpeed() for BT contact.") df = load_azure_data(query, params['outlier_threshold'], dt_threshold=dt_threshold, dx_threshold=dx_threshold).get(uuid, None) logger.info(f"BTContactGraph: Parsing trajectory for BT contact") trajectory = TrajectoryParser(pd_frame=df, uuid=uuid, verbose=0) logger.info( f"Finished getTrajectorySpeed() and parsing trajectory for BT contact." ) return trajectory
def query_points_batched(self, points: List[List[float]], query_type_names: List[str], distances, element_types=None, mt_split=True, mt_threshold=0) -> List[Dict[str, Any]]: """ This method needs some cleaning up. :param points: The points, as a list of pairs of coordinates :param query_type_names: Query types, as in the _QUERY_TYPES_LIST dict. Types of POIs to retrieve :param distances: Accuracy/distances for each point. Can be an integer, which sets a distance for each point :param element_types: The types of OSM structures to retreive :param mt_split: :param mt_threshold: :return: """ query_types: Iterable[QueryType] = [ _QUERY_TYPES_INDEX[query_type] for query_type in query_type_names ] trajectory_length = len(points) if type(distances) == int: distances = [distances] * trajectory_length # The OSM query denoting poi types and their node/way/relation setup type_queries: Iterable[str] = self.__type_queries( element_types, query_types) # Decide on multithreading parameters ... resolved_mt_threshold = max(self.batched_mt_threshold, mt_threshold) resolved_mt = (resolved_mt_threshold > 0) and self.batched or mt_split # Compute the bounding boxes and the containing bounding box bounding_boxes, containing_box = self.__bounding_boxes( points, distances) sqkm = containing_box.sqkm() if trajectory_length > 100 or sqkm > 10.0: # We need to split the box. (All hard-coded parameters here are candidates for configuration.) split_count = max( 2, int(min(trajectory_length / 2, trajectory_length / 50))) logger.info( f"Trajectory[{trajectory_length}] spans {containing_box.area_str()}, split in {split_count}" ) trajectory_splits = np.array_split(points, split_count) distances_splits = np.array_split(distances, split_count) query_hits_splits = [] if resolved_mt and split_count > resolved_mt_threshold: # We want to multi-thread to exploit the OSM capacity ordered_output: List[Dict[str, Any]] = [{}] * split_count success: int = 0 logger.info(f"Starting {split_count} OSM sub-threads...") with concurrent.futures.ThreadPoolExecutor( max_workers=self.max_workers) as executor: futures = {} for index in range(0, split_count): futures[executor.submit( partial(self.subquery, trajectory_splits[index], distances_splits[index], query_type_names, query_types, type_queries))] = index for future in concurrent.futures.as_completed(futures): try: response = future.result() ordered_output[futures[future]] = response success += 1 except Exception as ex: logger.warn("Failed to query OSM", ex) logger.info( f"Successfully processed {success:d} / {split_count:d} OSM requests." ) for output in ordered_output: query_hits_splits.extend(output) return query_hits_splits else: # Run the boxes in sequence, avoid the overhead of multithreading for i in range(0, split_count): hits = self.subquery(trajectory_splits[i], distances_splits[i], query_type_names, query_types, type_queries) query_hits_splits.extend(hits) return query_hits_splits else: # Run the whole box in one go return self.query_hits_partial_trajectory(query_type_names, bounding_boxes, containing_box, query_types, type_queries)
def to_dict_daily(self): """ Returns a dictionary representation of the report where contacts are aggregated on a daily basis. :return: A dictionary of the form: {<<uuid of contact>>: { "bluetooth_cumulative_risk_score": 10.0, "gps_cumulative_risk_score": 10.0, "categorical_risk": "medium", "bluetooth_cumulative_duration": 210.0, "gps_cumulative_duration": 210.0, "number_of_contacts": 3, "points_of_interest": "residential, school", "2020-04-10": { 'gps_contacts' : <<output of to_dict() call on gps contact list>> (if existing), 'bluetooth_contacts' : <<output of to_dict() call on BT contact list>> (if existing), 'bar_plot' : <<bar plot containing summary of gps and bluetooth contact details>> }, "2020-04-11": { ... } } <<uuid of contact>>: { "bluetooth_cumulative_risk_score": 10.0, ... ... } """ # Create a default dic where we can append without creating keys dic = nested_dict() n = 0 N = len(self.contacts.keys()) for (_, uuid2), contact_list in self.contacts.items(): logger.info(f"Generating report {n+1}/{N}") n += 1 if not contact_list.include_in_report() and not self.testing: logger.info( "Contact does not match the FHI requirements... skipping") continue logger.info( "Contact matches the FHI requirements... adding to report") gps_contacts = contact_list.filter(contact_type="gps") bt_contacts = contact_list.filter(contact_type="bluetooth") dic[uuid2]["cumulative"]["all_contacts"] = contact_list.to_dict( include_individual_contacts=False, include_bar_plot=True) dic[uuid2]["cumulative"]['gps_contacts'] = gps_contacts.to_dict( include_individual_contacts=False, include_hist=True) dic[uuid2]["cumulative"]['bt_contacts'] = bt_contacts.to_dict( include_individual_contacts=False) daily_contacts = contact_list.split_by_days() for day, contact_list_day in daily_contacts.items(): # After splitting we need to check again that all contacts have the required min_duration gps_contacts_day = contact_list_day.filter( contact_type="gps", min_duration=params["min_duration"]) bt_contacts_day = contact_list_day.filter( contact_type="bluetooth", min_duration=params["bt_min_duration"]) all_contacts_day = ContactList(gps_contacts_day + bt_contacts_day) dic[uuid2]['daily'][day.isoformat( )]['all_contacts'] = all_contacts_day.to_dict( include_individual_contacts=False, include_bar_plot=False, include_summary_plot=self.include_maps) dic[uuid2]['daily'][day.isoformat( )]['gps_contacts'] = gps_contacts_day.to_dict( include_individual_contacts=False, include_hist=True) dic[uuid2]['daily'][ day.isoformat()]['bt_contacts'] = bt_contacts_day.to_dict( include_individual_contacts=False) # Enrich the cumulative info for uuid2 by how many days were spent with _ daily = dic[uuid2]['daily'] gps_days = set(day for day in daily if daily[day]['gps_contacts']['number_of_contacts']) bt_days = set(day for day in daily if daily[day]['bt_contacts']['number_of_contacts']) contact_days = gps_days | bt_days dic[uuid2]['cumulative']['all_contacts']['days_in_contact'] = len( contact_days) dic[uuid2]['cumulative']['gps_contacts']['days_in_contact'] = len( gps_days) dic[uuid2]['cumulative']['bt_contacts']['days_in_contact'] = len( bt_days) # defaultdict to defaultdict dic = fhi_filter_dict(dic) # Sign it off # NOTE: version info should be top level but perhpas too many # things on our as well as FHI side depend on assumption that dic.keys() # is only uuid for uuid in dic.keys(): dic[uuid]['version_info']['pipeline'] = corona.__VERSION__ dic[uuid]['version_info']['device'] = self.device_info[uuid] # NOTE: at this point ordering of entries in dic is not guaranteed # to be the same as in original dic (where events where ordered by time) dic = default_to_regular(dic) # Sort daily # {uuid: {'cumulative': ..., # 'daily': {'date0': x, # 'date1': y}}} as_date = lambda string: datetime.datetime.strptime(string, '%Y-%m-%d') # Assure ordering of dates uuids = list(dic.keys()) for uuid in uuids: dates = dic[uuid]['daily'].keys() # As strings dic[uuid]['daily'] = { date: dic[uuid]['daily'][date] for date in sorted(dates, key=as_date) } return dic
def isolate_events(events, debug=False): '''A list of pandas row(events) is isolated to produce new events''' # NOTE: after the isolate the events should 'isolated' if len(events) == 1: return [events[0]] # This is the work horse A, B, rest = events[0], events[1], events[2:] debug and logger.info('<%g---(A)--%g>' % (e_start(A), e_end(A))) debug and logger.info('<%g---(B)--%g>' % (e_start(B), e_end(B))) if e_is_identical(A, B): debug and logger.info('Identity\n') new_event = [ e_uuid(A), # This guys are determined by the enclosing e_pd(A), # event e_start(A), e_dur(A), # Focus on worst case scenario so we pick longest *_duration max(e_vcd(A), e_vcd(B)), max(e_cd(A), e_cd(B)), max(e_rcd(A), e_rcd(B)) ] return isolate_events([new_event] + rest) if e_no_overlap(A, B): debug and logger.info('!Overlap\n') return [A] + isolate_events([B] + rest) if e_contains(A, B): debug and logger.info('Contains\n') new_event = [ e_uuid(A), # This guys are determined by the enclosing e_pd(A), # event e_start(A), e_dur(A), # Focus on worst case scenario so we pick longest *_duration max(e_vcd(A), e_vcd(B)), max(e_cd(A), e_cd(B)), max(e_rcd(A), e_rcd(B)) ] return isolate_events([new_event] + rest) # Join the events if right_overlaps(A, B): debug and logger.info('Overlap\n') # The new event has start of A and end of B start_A, start_B = map(e_start, (A, B)) end_A, end_B = map(e_end, (A, B)) dur_A, dur_B = map(e_dur, (A, B)) # SA<------->EA # SB<--------->EB overlap = end_A - start_B foo = lambda qA, qB: (qA / dur_A * (dur_A - overlap) + max( qA / dur_A, qB / dur_B) * overlap + qB / dur_B * (dur_B - overlap)) event = [ e_uuid(A), e_pd(A), start_A, end_B - start_A, foo(e_vcd(A), e_vcd(B)), foo(e_cd(A), e_cd(B)), foo(e_rcd(A), e_rcd(B)) ] return isolate_events([event] + rest) raise ValueError('This should not happen', A, B)
def _get_trajectories(self): assert (len(self.query_uuids) == 1 ) # FIXME: support multiple query uuids query_uuid = self.query_uuids[0] params = self.params # Load data from database dt_threshold = params['gps_dt_threshold'] dx_threshold = params['gps_dx_threshold'] # Now get the trajectory of the patient query = f"SELECT * FROM getTrajectorySpeed('{query_uuid}','{params['timeFrom']}','{params['timeTo']}')" logger.info( f"GPSContactGraph: Calling getTrajectorySpeed() for GPS contact") t_patient = load_azure_data(query, params['outlier_threshold'], dt_threshold=dt_threshold, dx_threshold=dx_threshold).get( query_uuid, []) logger.info( "GPSContactGraph: getTrajectorySpeed() for GPS contact finished") minimum_duration = 60 maximum_bb_diameter1 = 800 maximum_bb_duration1 = 3 * 60 maximum_bb_diameter2 = 200 t_split = GPSContactGraph._bounding_boxes_greedy_( t_patient, minimum_duration, maximum_bb_diameter1, maximum_bb_duration1, maximum_bb_diameter2) logger.info( f"GPSContactGraph: Split trajectory into {len(t_split)} segments") # Get other trajectories using bounding box method logger.info( f"GPSContactGraph: Calling get other trajectories (using bounding boxes) for GPS contacts" ) t = {} temp_trajectories = [] for t_piece in t_split: timeFrom = datetime.datetime.utcfromtimestamp( t_piece['time'].min()).strftime('%Y-%m-%d %H:%M:%S') timeTo = datetime.datetime.utcfromtimestamp( t_piece['time'].max()).strftime('%Y-%m-%d %H:%M:%S') logger.info( "GPSContactGraph: Dealing with time window: {0} - {1}".format( timeFrom, timeTo)) lat_min = t_piece['latitude'].min() lat_max = t_piece['latitude'].max() long_min = t_piece['longitude'].min() long_max = t_piece['longitude'].max() query = f"SELECT * FROM getWithinBB ({long_min}, {lat_min},{long_max},{lat_max},'{timeFrom}','{timeTo}') ORDER BY 1,2 ASC" # Appends dictionary of format {uuid : pd_frame} to the list temp_trajectories.append( load_azure_data(query, params['outlier_threshold'], dt_threshold=dt_threshold, dx_threshold=dx_threshold)) # Combine data frames of temporary trajectories for temp_trajectories in temp_trajectories: for key in temp_trajectories.keys(): if key in t.keys(): # Concatanate pd frame t[key] = pd.concat([t[key], temp_trajectories[key]]) else: t[key] = temp_trajectories[key] # Finally rebase indexes of pandas frames (these are not consistent now) for key in t.keys(): t[key] = t[key].reset_index() logger.info( f"GPSContactGraph: Calling get other trajectories (using bounding boxes) for GPS contacts finished" ) logger.info( f"GPSContactGraph: Found GPS contacts with {len(t)} people.") logger.info("GPSContactGraph: Parsing trajectories of GPS contacts") trajectories = {} for uuid, df in t.items(): trajectories[uuid] = TrajectoryParser(pd_frame=df, uuid=uuid, verbose=0) return trajectories
def run_analysis_pipeline(patient_uuid, output_formats=["dict"], daily_summary=True, timeFrom=None, timeTo=None, request_id=None, html_filename_prefix="", include_maps="static", testing=False): """ Runs the analysis pipeline and returns the risk report. :param patient_uuid: UUID of the patient to be analysed :param output_format: The output format of the report. Valid options are "dict, html, stdout". :param daily_summary: If True, the contacts are aggregated on a daily basis, otherwise each contact is reported independently._dist_thresh_ :param timeFrom: The start time of the analysis. If None, the pipeline runs the analysis for the last days as specified in default_parameters.py. :param timeTo: The end time of the analysis. If None, UTC now will be used. :param html_filename_prefix: Only valid if output_format includes "html". A prefix string that for the filename._dist_thresh_ :param include_maps: Specifies which types of maps to include in the report. Valid options are None, "static" or "interactive". :param testing: Boolean flag - if true reports also contacts that do not satisfy the criteria defined by FHI """ if request_id: context_name = str(request_id) else: context_name = str(patient_uuid) calling_thread = current_thread() calling_thread_name = calling_thread.name calling_thread.name = context_name try: # Set parameters assert set(output_formats).issubset(("dict", "html", "stdout")) patient_uuid = patient_uuid.lower() # UUIDs are always lower characters by convention set_analysis_period(params, timeFrom, timeTo) logger.info("Running analysis pipeline with following parameters and config (extracts): " f"Params={json.dumps(params, default=str)} " f"Config={json.dumps(config.loggable_params(), default=str)}") # We only render matplotlib images, so we can use the agg backend. matplotlib.use('Agg') # Build contact graphs gps_contact_graph = GPSContactGraph([patient_uuid], params) bt_contact_graph = BTContactGraph([patient_uuid], params) # Extract contacts results gps_results = gps_contact_graph.contacts_with(patient_uuid) bt_results = bt_contact_graph.contacts_with(patient_uuid) all_results = bt_results + gps_results # Gather device infos of uuids in graph device_info = gps_contact_graph.node_device_info.copy() device_info.update(bt_contact_graph.node_device_info) # Log all contacts anonymously log_contacts(patient_uuid, all_results.contacts, device_info, add_random_salt=True) # Create report report = RiskReport(patient_uuid, all_results.contacts, device_info, include_maps, testing) # Return report in the requested format if "html" in output_formats: filename = f"{html_filename_prefix}report_{patient_uuid}_dist_thresh_{params['filter_options']['dist_thresh']}.html" report.to_html(filename, daily_summary) logger.info("Analysis pipeline finished") if "stdout" in output_formats: logger.info(report) logger.info("Analysis pipeline finished") if "dict" in output_formats: if daily_summary: d = report.to_dict_daily() else: d = report.to_dict() logger.info("Analysis pipeline finished") return d finally: calling_thread.name = calling_thread_name