def store(data: OrderedDict, input_streams: dict, output_streams: dict, metadata, CC_obj: CerebralCortex, config: dict): """ Store diagnostic results with its metadata in the data-store :param input_streams: :param data: :param CC_obj: :param config: :param algo_type: """ if data: # basic output stream info owner = input_streams[0]["owner_id"] dd_stream_id = output_streams["id"] dd_stream_name = output_streams["name"] stream_type = "ds" data_descriptor = metadata["dd"] execution_context = metadata["ec"] annotations = metadata["anno"] ds = DataStream(identifier=dd_stream_id, owner=owner, name=dd_stream_name, data_descriptor=data_descriptor, execution_context=execution_context, annotations=annotations, stream_type=stream_type, data=data) CC_obj.save_datastream(ds, "datastream")
def audit_user_streams(user_id, all_days, cc_config): print('X' * 100, cc_config) CC = CerebralCortex(cc_config) all_user_streams = CC.get_user_streams(user_id) userbuf = '' for user_stream_key in all_user_streams: user_stream = all_user_streams[user_stream_key] if 'analysis' not in user_stream['name']: continue for day in all_days: data_points = 0 for stream_id in user_stream['stream_ids']: ds = CC.get_stream(stream_id, user_id, day) data_points += len(ds.data) buf = '%s\t%s\t%s\t%d\n' % (user_id, user_stream['name'], str(day), data_points) userbuf += buf out_dir = '/tmp/data_audit' if not os.path.exists(out_dir): os.mkdir(out_dir) file_path = os.path.join(out_dir, user_id) f = open(file_path, 'w') f.write(userbuf) f.close()
def get_cc(hostname): """ Builds an instance of CerebralCortex to suit a known environment. Args: hostname (str): The hostname of the machine calling for the CerebralCortex instance. Returns: cc (CerebralCortex): An instance of CerebralCortex configured for the host machine. """ from cerebralcortex.cerebralcortex import CerebralCortex if hostname == "cerebralcortex": cc = CerebralCortex( '/home/vagrant/CerebralCortex-DockerCompose/cc_config_file/cc_vagrant_configuration.yml' ) elif '10dot' in hostname or 'memphis' in hostname: cc = CerebralCortex( '/cerebralcortex/code/config/cc_starwars_configuration.yml') else: print("unknownn environment!") return None return cc
def __init__(self, config): self.CC = CerebralCortex(config) self.config = self.CC.config self.sqlData = SqlData(self.config, dbName="environmental_data_collection") self.process()
def mobile_app_availability_marker(all_streams, stream_name: str, owner_id, CC: CerebralCortex, config: dict): """ This algorithm uses phone battery percentages to decide whether mobile app was available or unavailable. Theoretically, phone battery data shall be collected 24/7. :param raw_stream_id: :param CC: :param config: """ marker_version = "0.0.1" if config["stream_names"]["phone_battery"] in all_streams: raw_stream_ids = all_streams[config["stream_names"] ["phone_battery"]]["stream_ids"] dd_stream_name = config["stream_names"]["app_availability_marker"] # using stream_id, data-diagnostic-stream-id, and owner id to generate a unique stream ID for battery-marker app_availability_marker_stream_id = generate_dd_stream_uuid( dd_stream_name, marker_version, owner_id, "MOBILE APP AVAILABILITY MARKER") input_streams = [{ "owner_id": owner_id, "id": raw_stream_ids, "name": stream_name }] output_stream = { "id": app_availability_marker_stream_id, "name": dd_stream_name, "algo_type": config["algo_type"]["app_availability_marker"] } metadata = get_metadata(dd_stream_name, input_streams, config) if isinstance(raw_stream_ids, list): for raw_stream_id in raw_stream_ids: stream_days = CC.get_stream_days( raw_stream_id, app_availability_marker_stream_id, CC) for day in stream_days: try: stream = CC.get_stream(raw_stream_id, day=day, data_type=DataSet.COMPLETE) if len(stream.data) > 0: windowed_data = window( stream.data, config['general']['window_size'], True) results = process_windows(windowed_data, config) merged_windows = merge_consective_windows(results) if len(merged_windows) > 0: store(merged_windows, input_streams, output_stream, metadata, CC, config) except Exception as e: CC.logging.log( "Error processing: owner-id: %s, stream-id: %s, stream-name: %s, day: %s. Error: " % (str(owner_id), str(raw_stream_id), str(stream_name), str(day), str(e)))
def sensor_availability(all_streams, wrist: str, owner_id: uuid, CC: CerebralCortex, config: dict): """ Mark missing data as wireless disconnection if a participate walks away from phone or sensor :param raw_stream_id: :param stream_name: :param owner_id: :param dd_stream_name: :param phone_physical_activity: :param CC: :param config: """ marker_version = "0.0.1" if config["stream_names"]["phone_physical_activity"] in all_streams: phone_physical_activity = all_streams[config["stream_names"]["phone_physical_activity"]]["stream_ids"] else: phone_physical_activity = None key0 = "motionsense_hrv_accel_"+wrist key1 = "motionsense_hrv_"+wrist+"_wireless_marker" raw_stream_ids = all_streams[config["stream_names"][key0]]["stream_ids"], stream_name = all_streams[config["stream_names"][key0]]["name"] dd_stream_name = config["stream_names"][key1] if config["stream_names"]["phone_physical_activity"] in all_streams: # using stream_id, data-diagnostic-stream-id, and owner id to generate a unique stream ID for battery-marker wireless_marker_stream_id = generate_dd_stream_uuid(dd_stream_name, marker_version, owner_id, "WIRELESS DISCONNECTION MARKER") input_streams = [{"owner_id": owner_id, "id": raw_stream_ids, "name": stream_name}] output_stream = {"id": wireless_marker_stream_id, "name": dd_stream_name, "algo_type": config["algo_type"]["sensor_unavailable_marker"]} metadata = get_metadata(dd_stream_name, input_streams, config) if isinstance(raw_stream_ids, list): for raw_stream_id in raw_stream_ids: stream_days = CC.get_stream_days(raw_stream_id, wireless_marker_stream_id, CC) for day in stream_days: try: # load stream data to be diagnosed raw_stream = CC.get_stream(raw_stream_id, day=day, data_type=DataSet.COMPLETE) if len(raw_stream.data) > 0: windowed_data = window(raw_stream.data, config['general']['window_size'], True) results = process_windows(windowed_data, day, CC, phone_physical_activity, config) merged_windows = merge_consective_windows(results) if len(merged_windows) > 0: store(merged_windows, input_streams, output_stream, metadata, CC, config) except Exception as e: CC.logging.log("Error processing: owner-id: %s, stream-id: %s, stream-name: %s, day: %s. Error: " %(str(owner_id), str(raw_stream_id), str(stream_name), str(day), str(e)))
def generate_report(user_id: uuid, username: str, cc_config_file, config: dict): """ Contains pipeline execution of all the reports :param user_id: :param CC: :param config: """ CC = CerebralCortex(cc_config_file) # get all the streams belong to a participant streams = CC.get_user_streams(user_id) if streams and len(streams) > 0: # Data Yield if config["input_stream"][ "motionsense_hrv_led_quality_left"] in streams: compute_data_yield( streams[config["input_stream"] ["motionsense_hrv_led_quality_left"]]["identifier"], username, "motionsense_left_led", CC, config) if config["input_stream"][ "motionsense_hrv_led_quality_right"] in streams: compute_data_yield( streams[config["input_stream"] ["motionsense_hrv_led_quality_right"]]["identifier"], username, "motionsense_right_led", CC, config) if config["input_stream"][ "motionsense_hrv_accel_quality_left"] in streams: compute_data_yield( streams[config["input_stream"] ["motionsense_hrv_accel_quality_left"]]["identifier"], username, "motionsense_left_accel", CC, config) if config["input_stream"][ "motionsense_hrv_accel_quality_right"] in streams: compute_data_yield( streams[config["input_stream"] ["motionsense_hrv_accel_quality_right"]]["identifier"], username, "motionsense_right_accel", CC, config) if config["input_stream"]["autosense_ble_accel_quality"] in streams: compute_data_yield( streams[config["input_stream"]["autosense_ble_accel_quality"]] ["identifier"], username, "autosense_ble_accel", CC, config) if config["input_stream"][ "autosense_ble_respiration_quality"] in streams: compute_data_yield( streams[config["input_stream"] ["autosense_ble_respiration_quality"]]["identifier"], username, "autosense_ble_respiration", CC, config)
def get_datastream(CC:CerebralCortex, identifier:str, day:str, user_id:str, localtime:bool)->List[DataPoint]: stream_ids = CC.get_stream_id(user_id,identifier) data = [] for stream_id in stream_ids: temp_data = CC.get_stream(stream_id=stream_id['identifier'],user_id=user_id,day=day,localtime=localtime) if len(temp_data.data)>0: data.extend(temp_data.data) return data
def filter_battery_off_windows(stream_id: uuid, stream_name: str, main_stream_windows: dict, owner_id: uuid, config: dict, CC_obj: CerebralCortex) -> dict: """ :param stream_id: :param stream_name: :param main_stream_windows: :param owner_id: :param config: :param CC_obj: :return: """ start_time = "" end_time = "" # load phone battery data phone_battery_marker_stream_id = uuid.uuid3(uuid.NAMESPACE_DNS, str(stream_id + config["stream_name"]["phone_battery"] + owner_id)) phone_battery_marker_stream = CC_obj.get_datastream(phone_battery_marker_stream_id, data_type=DataSet.ONLY_DATA, start_time=start_time, end_time=end_time) # load sensor battery data if stream_name == config["stream_names"]["autosense_ecg"] or stream_name == config["stream_names"]["autosense_rip"]: sensor_battery_marker_stream = CC_obj.get_datastream(phone_battery_marker_stream_id, data_type=DataSet.ONLY_DATA, start_time=start_time, end_time=end_time) elif stream_name == config["stream_names"]["motionsense_hrv_accel_right"]: sensor_battery_marker_stream = CC_obj.get_datastream(phone_battery_marker_stream_id, data_type=DataSet.ONLY_DATA, start_time=start_time, end_time=end_time) elif stream_name == config["stream_names"]["motionsense_hrv_accel_left"]: sensor_battery_marker_stream = CC_obj.get_datastream(phone_battery_marker_stream_id, data_type=DataSet.ONLY_DATA, start_time=start_time, end_time=end_time) battery_marker = 0 results = None for key, data in main_stream_windows.items(): for phone_key, phone_data in phone_battery_marker_stream.items(): if phone_key.start_time <= key.start_time and phone_key.end_time >= key.end_time: battery_marker = 1 for sensor_key, sensor_data in sensor_battery_marker_stream.items(): if sensor_key.start_time <= key.start_time and sensor_key.end_time >= key.end_time: battery_marker = 1 if battery_marker != 1: results[key] = data return results
def mobile_app_availability_marker(raw_stream_id: uuid, stream_name: str, owner_id, dd_stream_name, CC: CerebralCortex, config: dict): """ This algorithm uses phone battery percentages to decide whether mobile app was available or unavailable. Theoretically, phone battery data shall be collected 24/7. :param raw_stream_id: :param CC: :param config: """ try: # using stream_id, data-diagnostic-stream-id, and owner id to generate a unique stream ID for battery-marker app_availability_marker_stream_id = uuid.uuid3(uuid.NAMESPACE_DNS, str( raw_stream_id + dd_stream_name + owner_id + "MOBILE APP AVAILABILITY MARKER")) stream_days = get_stream_days(raw_stream_id, app_availability_marker_stream_id, CC) for day in stream_days: stream = CC.get_stream(raw_stream_id, day=day, data_type=DataSet.COMPLETE) if len(stream.data) > 0: windowed_data = window(stream.data, config['general']['window_size'], True) results = process_windows(windowed_data, config) merged_windows = merge_consective_windows(results) if len(merged_windows) > 0: input_streams = [{"owner_id": owner_id, "id": str(raw_stream_id), "name": stream_name}] output_stream = {"id": app_availability_marker_stream_id, "name": dd_stream_name, "algo_type": config["algo_type"]["app_availability_marker"]} metadata = get_metadata(dd_stream_name, input_streams, config) store(merged_windows, input_streams, output_stream, metadata, CC, config) except Exception as e: print(e)
def compute_data_yield(stream_id: uuid, username: str, report_stream_name: str, CC: CerebralCortex, config: dict): """ This uses LED quality stream to calculate total good quality data for each data LED quality stream has data quality available for 3 second windows """ data_dir = config["output"]["folder_path"] + "/" + config["reports"][ "data_yield_per_day"] + "/" data_yield_report = data_dir + username + "_" + report_stream_name + ".csv" if not os.path.exists(data_dir): os.mkdir(data_dir) os.mknod(data_yield_report) stream_days = get_stream_days(stream_id, CC) with open(data_yield_report, "w") as report: report.write(report_stream_name + ",,,,\n") report.write("day, good hours, total_hours,, \n") for day in stream_days: # load stream data raw_stream = CC.get_stream(stream_id, day=day, data_type=DataSet.ONLY_DATA) if len(raw_stream) > 0: results = process_stream(raw_stream) results = str(day) + "," + results report.write(results)
def get_streams(stream_id: uuid, username: str, wrist: str, CC: CerebralCortex, config: dict): """ This uses LED quality stream to calculate total good quality data for each data LED quality stream has data quality available for 3 second windows """ data_dir = config["output"]["folder_path"] + "/" + config["reports"][ "data_yield_per_day"] + "/" data_yield_report = data_dir + username + "_" + wrist + ".csv" if not os.path.exists(data_dir): os.mkdir(data_dir) os.mknod(data_yield_report) stream_days = get_stream_days(stream_id, CC) with open(data_yield_report, "w") as report: report.write( "day, good, noise, bad, band_off, missing, not_worn, band_loose \n" ) for day in stream_days: # load stream data raw_stream = CC.get_stream(stream_id, day=day, data_type=DataSet.COMPLETE) if len(raw_stream.data) > 0: results = process_stream(raw_stream.data) results = str(day) + "," + results report.write(results)
def get_stream_days(raw_stream_id: uuid, dd_stream_id: uuid, CC: CerebralCortex) -> List: """ Returns a list of days (string format: YearMonthDay (e.g., 20171206) :param raw_stream_id: :param dd_stream_id: """ dd_stream_days = CC.get_stream_duration(dd_stream_id)["end_time"] if not dd_stream_days: stream_days = CC.get_stream_duration(raw_stream_id) days = stream_days["end_time"] - stream_days["start_time"] for day in range(days.days + 1): stream_days.append((stream_days["start_time"] + timedelta(days=day)).strftime('%Y%m%d')) else: stream_days = [(dd_stream_days + timedelta(days=1)).strftime('%Y%m%d')] return stream_days
def phone_screen_touch_marker(raw_stream_id: uuid, raw_stream_name: str, owner_id, dd_stream_name, CC: CerebralCortex, config: dict, start_time=None, end_time=None): """ This is not part of core data diagnostic suite. It only calculates how many screen touches are there. :param raw_stream_id: :param CC: :param config: """ try: # using stream_id, data-diagnostic-stream-id, and owner id to generate a unique stream ID for battery-marker screen_touch_stream_id = uuid.uuid3( uuid.NAMESPACE_DNS, str(raw_stream_id + dd_stream_name + owner_id + "mobile phone screen touch marker")) stream_days = get_stream_days(raw_stream_id, screen_touch_stream_id, CC) for day in stream_days: stream = CC.get_datastream(raw_stream_id, data_type=DataSet.COMPLETE, day=day, start_time=start_time, end_time=end_time) if len(stream.data) > 0: windowed_data = window(stream.data, config['general']['window_size'], True) results = process_windows(windowed_data) merged_windows = merge_consective_windows(results) if len(merged_windows) > 0: input_streams = [{ "owner_id": owner_id, "id": str(raw_stream_id), "name": raw_stream_name }] output_stream = { "id": screen_touch_stream_id, "name": dd_stream_name, "algo_type": config["algo_type"]["app_availability_marker"] } metadata = get_metadata(dd_stream_name, input_streams, config) store(merged_windows, input_streams, output_stream, metadata, CC, config) except Exception as e: print(e)
def get_stream_days(stream_id: uuid, CC: CerebralCortex) -> List: """ Returns a list of days (string format: YearMonthDay (e.g., 20171206) :param stream_id: """ stream_dicts = CC.get_stream_duration(stream_id) stream_days = [] days = stream_dicts["end_time"] - stream_dicts["start_time"] for day in range(days.days + 1): stream_days.append((stream_dicts["start_time"] + timedelta(days=day)).strftime('%Y%m%d')) return stream_days
def battery_marker(raw_stream_id: uuid, stream_name: str, user_id, dd_stream_name, CC: CerebralCortex, config: dict): """ This algorithm uses battery percentages to decide whether device was powered-off or battery was low. All the labeled data (st, et, label) with its metadata are then stored in a datastore. :param raw_stream_id: :param CC: :param config: """ try: # using stream_id, data-diagnostic-stream-id, and owner id to generate a unique stream ID for battery-marker battery_marker_stream_id = uuid.uuid3( uuid.NAMESPACE_DNS, str(raw_stream_id + dd_stream_name + user_id + "BATTERY MARKER")) stream_days = get_stream_days(raw_stream_id, battery_marker_stream_id, CC) for day in stream_days: stream = CC.get_stream(raw_stream_id, day=day, data_type=DataSet.COMPLETE) if len(stream.data) > 0: windowed_data = window(stream.data, config['general']['window_size'], True) results = process_windows(windowed_data, stream_name, config) merged_windows = merge_consective_windows(results) if len(merged_windows) > 0: input_streams = [{ "owner_id": user_id, "id": str(raw_stream_id), "name": stream_name }] output_stream = { "id": battery_marker_stream_id, "name": dd_stream_name, "algo_type": config["algo_type"]["battery_marker"] } labelled_windows = mark_windows(battery_marker_stream_id, merged_windows, CC, config) metadata = get_metadata(dd_stream_name, input_streams, config) store(labelled_windows, input_streams, output_stream, metadata, CC, config) except Exception as e: print(e)
def sensor_availability(raw_stream_id: uuid, stream_name: str, owner_id: uuid, dd_stream_name, phone_physical_activity, CC: CerebralCortex, config: dict): """ Mark missing data as wireless disconnection if a participate walks away from phone or sensor :param raw_stream_id: :param stream_name: :param owner_id: :param dd_stream_name: :param phone_physical_activity: :param CC: :param config: """ # using stream_id, data-diagnostic-stream-id, and owner id to generate a unique stream ID for battery-marker wireless_marker_stream_id = uuid.uuid3( uuid.NAMESPACE_DNS, str(raw_stream_id + dd_stream_name + owner_id)) stream_days = get_stream_days(raw_stream_id, wireless_marker_stream_id, CC) for day in stream_days: # load stream data to be diagnosed raw_stream = CC.get_stream(raw_stream_id, day=day, data_type=DataSet.COMPLETE) if len(raw_stream.data) > 0: windowed_data = window(raw_stream.data, config['general']['window_size'], True) results = process_windows(windowed_data, day, CC, phone_physical_activity, config) merged_windows = merge_consective_windows(results) if len(merged_windows) > 0: input_streams = [{ "owner_id": owner_id, "id": str(raw_stream_id), "name": stream_name }] output_stream = { "id": wireless_marker_stream_id, "name": dd_stream_name, "algo_type": config["algo_type"]["sensor_unavailable_marker"] } metadata = get_metadata(dd_stream_name, input_streams, config) store(merged_windows, input_streams, output_stream, metadata, CC, config)
def diagnose_streams(owner_id: uuid, CC: CerebralCortex, config: dict): """ Contains pipeline execution of all the diagnosis algorithms :param owner_id: :param CC: :param config: """ # get all the streams belong to a participant streams = CC.get_user_streams(owner_id) if streams and len(streams) > 0: # phone battery battery_marker(streams, owner_id, config["stream_names"]["phone_battery"], CC, config) # autosense battery battery_marker(streams, owner_id, config["stream_names"]["autosense_battery"], CC, config) # TODO: Motionsense battery values are not available. # TODO: Uncomment following code when the motionsense battery values are available #battery_marker(streams, owner_id, config["stream_names"]["motionsense_hrv_battery_right"], CC, config) #battery_marker(streams, owner_id, config["stream_names"]["motionsense_hrv_battery_left"], CC, config) # mobile phone availability marker mobile_app_availability_marker( streams, streams[config["stream_names"]["phone_battery"]]["name"], owner_id, CC, config) # Sensor failure sensor_failure_marker(streams, "right", owner_id, CC, config) sensor_failure_marker(streams, "left", owner_id, CC, config) # Motionsense (ms) wireless disconnection (wd) ms_wd(streams, "right", owner_id, CC, config) ms_wd(streams, "left", owner_id, CC, config) # Attachment marker ms_attachment_marker(streams, "right", owner_id, CC, config) ms_attachment_marker(streams, "left", owner_id, CC, config) # Packet-loss marker packet_loss_marker(streams, "right", "accel", owner_id, CC, config) packet_loss_marker(streams, "left", "accel", owner_id, CC, config) packet_loss_marker(streams, "right", "gyro", owner_id, CC, config) packet_loss_marker(streams, "left", "gyro", owner_id, CC, config)
def process_feature_on_user(user, module_name, all_days, cc_config_path): try: cc = CerebralCortex(cc_config_path) module = importlib.import_module(module_name) feature_class_name = getattr(module, 'feature_class_name') feature_class = getattr(module, feature_class_name) feature_class_instance = feature_class(cc) if gps_key is not None: feature_class_instance.gps_api_key = gps_key f = feature_class_instance.process f(user, all_days) except Exception as e: err = str(e) + "\n" + str(traceback.format_exc()) print(err) syslog.openlog(ident="CerebralCortex-Driver") syslog.syslog(LOG_ERR, err) syslog.closelog()
def attachment_marker(raw_stream_id: uuid, stream_name: str, owner_id: uuid, dd_stream_name, CC: CerebralCortex, config: dict): """ Label sensor data as sensor-on-body, sensor-off-body, or improper-attachment. All the labeled data (st, et, label) with its metadata are then stored in a datastore """ # TODO: quality streams could be multiple so find the one computed with CC # using stream_id, data-diagnostic-stream-id, and owner id to generate a unique stream ID for battery-marker attachment_marker_stream_id = uuid.uuid3( uuid.NAMESPACE_DNS, str(raw_stream_id + dd_stream_name + owner_id + "ATTACHMENT MARKER")) stream_days = get_stream_days(raw_stream_id, attachment_marker_stream_id, CC) for day in stream_days: # load stream data to be diagnosed raw_stream = CC.get_stream(raw_stream_id, day=day, data_type=DataSet.COMPLETE) if len(raw_stream.data) > 0: windowed_data = window(raw_stream.data, config['general']['window_size'], True) results = process_windows(windowed_data, config) merged_windows = merge_consective_windows(results) input_streams = [{ "owner_id": owner_id, "id": str(raw_stream_id), "name": stream_name }] output_stream = { "id": attachment_marker_stream_id, "name": dd_stream_name, "algo_type": config["algo_type"]["attachment_marker"] } metadata = get_metadata(dd_stream_name, input_streams, config) store(merged_windows, input_streams, output_stream, metadata, CC, config)
"feature data" , required=True) parser.add_argument("-m", "--metadata_file", help="Path to the file containing " " the metadata information", required=True) args = vars(parser.parse_args()) metadata_map = {} stream_names = {} if args['cc_config']: CC_CONFIG_PATH = args['cc_config'] if args['data_dir']: DATA_DIR = args['data_dir'] if args['metadata_file']: METADATA = args['metadata_file'] CC = CerebralCortex(CC_CONFIG_PATH) def load_metadata(metadata_dir): ''' This method reads all the metadata files in the given directory and loads them with key as the stream name in the metadata_map dict. ''' metadata_files = [os.path.join(metadata_dir,f) for f in os.listdir(metadata_dir) if os.path.isfile(os.path.join(metadata_dir,f))] for mf in metadata_files: mfp = open(mf,'r') metadata_json = json.loads(mfp.read()) metadata_map[metadata_json['name']] = metadata_json def load_streamnames():
if data is not None and len(data) > 0: if data[0].offset is None or data[0].offset == "": return filename # else: # return "Data is empty." if __name__ == '__main__': # create and load CerebralCortex object and configs parser = argparse.ArgumentParser( description='CerebralCortex-Script to verify stream-format.') parser.add_argument("-conf", "--conf", help="Configuration file path", required=True) parser.add_argument("-study_name", "--study_name", help="Configuration file path", required=True) parser.add_argument( "-uid", "--uid", help= "User ID only if verification needs to be performed on a single participant", required=False) args = vars(parser.parse_args()) CC = CerebralCortex(args["conf"]) VerifyStreamFormat(args["study_name"], CC, args["uid"])
def diagnose_streams(user_id: uuid, CC: CerebralCortex, config: dict): """ Contains pipeline execution of all the diagnosis algorithms :param user_id: :param CC: :param config: """ # get all the streams belong to a participant streams = CC.get_user_streams(user_id) if streams and len(streams) > 0: # phone battery if config["stream_names"]["phone_battery"] in streams: battery_marker( streams[config["stream_names"]["phone_battery"]]["identifier"], streams[config["stream_names"]["phone_battery"]]["name"], user_id, config["stream_names"]["phone_battery_marker"], CC, config) # mobile phone availability marker mobile_app_availability_marker( streams[config["stream_names"]["phone_battery"]]["identifier"], streams[config["stream_names"]["phone_battery"]]["name"], user_id, config["stream_names"]["app_availability_marker"], CC, config) # autosense battery if config["stream_names"]["autosense_battery"] in streams: battery_marker( streams[config["stream_names"] ["autosense_battery"]]["identifier"], streams[config["stream_names"]["autosense_battery"]]["name"], user_id, config["stream_names"]["autosense_battery_marker"], CC, config) # TODO: Motionsense battery values are not available. # TODO: Uncomment following code when the motionsense battery values are available and/or not correct # if config["stream_names"]["motionsense_hrv_battery_right"] in streams: # battery_marker(streams[config["stream_names"]["motionsense_hrv_battery_right"]]["identifier"], streams[config["stream_names"]["motionsense_hrv_battery_right"]]["name"], participant_id, config["stream_names"]["motionsense_hrv_battery_right_marker"], CC, config) # if config["stream_names"]["motionsense_hrv_battery_left"] in streams: # battery_marker(streams[config["stream_names"]["motionsense_hrv_battery_left"]]["identifier"], streams[config["stream_names"]["motionsense_hrv_battery_left"]]["name"], participant_id, config["stream_names"]["motionsense_hrv_battery_left_marker"], CC, config) ### Sensor unavailable - wireless disconnection if config["stream_names"]["phone_physical_activity"] in streams: phone_physical_activity = streams[config["stream_names"][ "phone_physical_activity"]]["identifier"] else: phone_physical_activity = None if config["stream_names"]["motionsense_hrv_accel_right"] in streams: if config["stream_names"]["motionsense_hrv_gyro_right"]: sensor_failure_marker( streams[config["stream_names"] ["motionsense_hrv_right_attachment_marker"]] ["identifier"], streams[config["stream_names"] ["motionsense_hrv_accel_right"]]["identifier"], streams[config["stream_names"] ["motionsense_hrv_gyro_right"]]["identifier"], "right", user_id, config["stream_names"] ["motionsense_hrv_right_sensor_failure_marker"], CC, config) ms_wd( streams[config["stream_names"] ["motionsense_hrv_accel_right"]]["identifier"], streams[config["stream_names"]["motionsense_hrv_accel_right"]] ["name"], user_id, config["stream_names"] ["motionsense_hrv_right_wireless_marker"], phone_physical_activity, CC, config) if config["stream_names"]["motionsense_hrv_accel_left"] in streams: if config["stream_names"]["motionsense_hrv_gyro_left"]: sensor_failure_marker( streams[config["stream_names"] ["motionsense_hrv_left_attachment_marker"]] ["identifier"], streams[config["stream_names"] ["motionsense_hrv_accel_left"]]["identifier"], streams[config["stream_names"] ["motionsense_hrv_gyro_left"]]["identifier"], "left", user_id, config["stream_names"] ["motionsense_hrv_left_sensor_failure_marker"], CC, config) ms_wd( streams[config["stream_names"] ["motionsense_hrv_accel_left"]]["identifier"], streams[config["stream_names"] ["motionsense_hrv_accel_left"]]["name"], user_id, config["stream_names"]["motionsense_hrv_left_wireless_marker"], phone_physical_activity, CC, config) ### Attachment marker if config["stream_names"][ "motionsense_hrv_led_quality_right"] in streams: ms_attachment_marker( streams[config["stream_names"] ["motionsense_hrv_led_quality_right"]]["identifier"], streams[config["stream_names"] ["motionsense_hrv_led_quality_right"]]["name"], user_id, config["stream_names"] ["motionsense_hrv_right_attachment_marker"], CC, config) if config["stream_names"][ "motionsense_hrv_led_quality_left"] in streams: ms_attachment_marker( streams[config["stream_names"] ["motionsense_hrv_led_quality_left"]]["identifier"], streams[config["stream_names"] ["motionsense_hrv_led_quality_left"]]["name"], user_id, config["stream_names"] ["motionsense_hrv_left_attachment_marker"], CC, config) ### Packet-loss marker if config["stream_names"]["motionsense_hrv_accel_right"] in streams: packet_loss_marker( streams[config["stream_names"] ["motionsense_hrv_accel_right"]]["identifier"], streams[config["stream_names"]["motionsense_hrv_accel_right"]] ["name"], user_id, config["stream_names"] ["motionsense_hrv_accel_right_packetloss_marker"], CC, config) if config["stream_names"]["motionsense_hrv_accel_left"] in streams: packet_loss_marker( streams[config["stream_names"] ["motionsense_hrv_accel_left"]]["identifier"], streams[config["stream_names"]["motionsense_hrv_accel_left"]] ["name"], user_id, config["stream_names"] ["motionsense_hrv_accel_left_packetloss_marker"], CC, config) if config["stream_names"]["motionsense_hrv_gyro_right"] in streams: packet_loss_marker( streams[config["stream_names"] ["motionsense_hrv_gyro_right"]]["identifier"], streams[config["stream_names"]["motionsense_hrv_gyro_right"]] ["name"], user_id, config["stream_names"] ["motionsense_hrv_gyro_right_packetloss_marker"], CC, config) if config["stream_names"]["motionsense_hrv_gyro_left"] in streams: packet_loss_marker( streams[config["stream_names"]["motionsense_hrv_gyro_left"]] ["identifier"], streams[config["stream_names"] ["motionsense_hrv_gyro_left"]]["name"], user_id, config["stream_names"] ["motionsense_hrv_gyro_left_packetloss_marker"], CC, config) if config["stream_names"]["phone_screen_touch"] in streams: phone_screen_touch_marker( streams[config["stream_names"] ["phone_screen_touch"]]["identifier"], streams[config["stream_names"]["phone_screen_touch"]]["name"], user_id, config["stream_names"]["phone_screen_touch_marker"], CC, config)
CC, config) if __name__ == '__main__': # create and load CerebralCortex object and configs parser = argparse.ArgumentParser( description='CerebralCortex Kafka Message Handler.') parser.add_argument("-cc", "--cc_config_filepath", help="Configuration file path", required=True) parser.add_argument("-mdc", "--mdebugger_config_filepath", help="mDebugger configuration file path", required=True) args = vars(parser.parse_args()) CC = CerebralCortex(args["cc_config_filepath"]) # load data diagnostic configs md_config = Configuration(args["mdebugger_config_filepath"]).config # get/create spark context spark_context = get_or_create_sc(type="sparkContext") # run for one participant # DiagnoseData().one_user_data(["cd7c2cd6-d0a3-4680-9ba2-0c59d0d0c684"], md_config, CC, spark_context) # run for all the participants in a study all_users_data("mperf", md_config, CC, spark_context)
def write_data_file(file, streams, user, s): cc = CerebralCortex( "/cerebralcortex/code/config/cc_starwars_configuration.yml") if os.path.isfile(file + '.gz'): print("Already Processed %s" % file + '.gz') return True with open(file + '_temp', 'wt') as output_file: for stream_id in streams[s]['stream_ids']: logger.info('Processing %s' % streams[s]['name']) print('Processing %s' % streams[s]['name']) days = get_stream_days(cc, stream_id, streams[s]) for day in days: st = datetime.datetime.now() print("XXXXXXXXXX", streams[s]['name'], user['identifier'], stream_id, day) datastream = cc.get_stream(stream_id, user['identifier'], day, localtime=False) et = datetime.datetime.now() if len(datastream.data) > 0: if len(datastream.data) > 100000: logger.info('%s %s %d %s' % (streams[s]['name'], day, len(datastream.data), str(et - st))) print('%s %s %d %s' % (streams[s]['name'], day, len( datastream.data), str(et - st))) try: for d in datastream.data: output_string = str( int(d.start_time.timestamp() * 1e6)) if type(d.end_time) is datetime: output_string += ',' + str( int(d.end_time.timestamp() * 1e6)) else: output_string += ',-1' output_string += ',' + str(int(d.offset)) if type(d.sample) is list: output_string += ',' + ','.join( map(str, d.sample)) else: output_string += ',' + str(d.sample) output_file.write(output_string + '\n') except Exception as e: logger.error("Stream %s has had a parsing error" % streams[s]['name']) print("Stream %s has had a parsing error" % streams[s]['name']) logger.error(str(e)) print(str(e)) os.system('sort ' + file + '_temp | gzip > ' + file + '.gz') os.system('rm ' + file + '_temp') return True
parser.add_argument('--study', help="study name as appears in MySQL user metadata", required=True) parser.add_argument('--output', help="Output directory for the exported files", required=True) parser.add_argument('--participant', help="Participant username") parser.add_argument('-n', '--num_jobs', help="Number of concurrent export to run", type=int, default=1) args = parser.parse_args() CC = CerebralCortex( "/cerebralcortex/code/config/cc_starwars_configuration.yml") output_dir = args.output study_name = args.study # In[3]: users = CC.get_all_users(study_name=study_name) # In[4]: def get_stream_days(cc, identifier, stream): duration = cc.get_stream_duration(stream['identifier']) day = duration['start_time'] result = [] while day < (duration['end_time'] + datetime.timedelta(days=1)):
def attachment_marker(all_streams, wrist, owner_id: uuid, CC: CerebralCortex, config: dict): """ Label sensor data as sensor-on-body, sensor-off-body, or improper-attachment. All the labeled data (st, et, label) with its metadata are then stored in a datastore """ marker_version = "0.0.1" # TODO: quality streams could be multiple so find the one computed with CC # using stream_id, data-diagnostic-stream-id, and owner id to generate a unique stream ID for battery-marker key0 = "motionsense_hrv_led_quality_" + wrist key1 = "motionsense_hrv_" + wrist + "_attachment_marker" raw_stream_ids = all_streams[config["stream_names"][key0]]["stream_ids"] stream_name = all_streams[config["stream_names"][key0]]["name"] dd_stream_name = config["stream_names"][key1] if config["stream_names"][key0] in all_streams: attachment_marker_stream_id = generate_dd_stream_uuid( dd_stream_name, marker_version, owner_id, "ATTACHMENT MARKER") input_streams = [{ "owner_id": owner_id, "id": raw_stream_ids, "name": stream_name }] output_stream = { "id": attachment_marker_stream_id, "name": dd_stream_name, "algo_type": config["algo_type"]["attachment_marker"] } metadata = get_metadata(dd_stream_name, input_streams, config) if isinstance(raw_stream_ids, list): for raw_stream_id in raw_stream_ids: stream_days = CC.get_stream_days(raw_stream_id, attachment_marker_stream_id, CC) for day in stream_days: try: # load stream data to be diagnosed raw_stream = CC.get_stream(raw_stream_id, day=day, data_type=DataSet.COMPLETE) if len(raw_stream.data) > 0: windowed_data = window( raw_stream.data, config['general']['window_size'], True) results = process_windows(windowed_data, config) merged_windows = merge_consective_windows(results) store(merged_windows, input_streams, output_stream, metadata, CC, config) except Exception as e: CC.logging.log( "Error processing: owner-id: %s, stream-id: %s, stream-name: %s, day: %s. Error: " % (str(owner_id), str(raw_stream_id), str(stream_name), str(day), str(e)))
def main(): global cc_config_path global metadata_dir global gps_key # Get the list of the features to process parser = argparse.ArgumentParser(description='CerebralCortex ' 'Feature Processing Driver') parser.add_argument("-f", "--feature-list", help="List of feature names " "seperated by commas", nargs='?', default=None, required=False) parser.add_argument("-c", "--cc-config", help="Path to file containing the " "CerebralCortex configuration", required=True) parser.add_argument("-s", "--study-name", help="Study name.", required=True) parser.add_argument("-u", "--users", help="Comma separated user uuids", nargs='?', default=None, required=False) parser.add_argument("-sd", "--start-date", help="Start date in " "YYYYMMDD Format", required=True) parser.add_argument("-ed", "--end-date", help="End date in " "YYYYMMDD Format", required=True) parser.add_argument("-p", "--num-cores", type=int, help="Set a number " "greater than 1 to enable spark " "parallel execution ", required=False) parser.add_argument("-k", "--gps-key", help="GPS API " "key", required=False) args = vars(parser.parse_args()) feature_list = None study_name = None users = None start_date = None end_date = None date_format = '%Y%m%d' num_cores = 1 # default single threaded if args['feature_list']: feature_list = args['feature_list'].split(',') if args['cc_config']: cc_config_path = args['cc_config'] if args['study_name']: study_name = args['study_name'] if args['users']: users = args['users'].split(',') print('X' * 100) print(len(users)) if args['start_date']: start_date = datetime.strptime(args['start_date'], date_format) if args['end_date']: end_date = datetime.strptime(args['end_date'], date_format) if args['num_cores']: num_cores = args['num_cores'] if args['gps_key']: gps_key = args['gps_key'] all_days = [] while True: all_days.append(start_date.strftime(date_format)) start_date += timedelta(days=1) if start_date > end_date: break CC = None all_users = None try: CC = CerebralCortex(cc_config_path) if not users: users = CC.get_all_users(study_name) if not users: print('No users found') return if not len(users): print('No users found') return # no point continuing all_users = [usr['identifier'] for usr in users] else: all_users = users except Exception as e: print(str(e)) print(str(traceback.format_exc())) if not all_users: print('No users found for the study', study_name) return found_features = discover_features(feature_list) feature_to_process = generate_feature_processing_order(found_features) process_features(feature_to_process, all_users, all_days, num_cores)
def run(): selected_participants = [ "622bf725-2471-4392-8f82-fcc9115a3745", "d3d33d63-101d-44fd-b6b9-4616a803225d", "c1f31960-dee7-45ea-ac13-a4fea1c9235c", "7b8358f3-c96a-4a17-87ab-9414866e18db", "8a3533aa-d6d4-450c-8232-79e4851b6e11", "e118d556-2088-4cc2-b49a-82aad5974167", "260f551d-e3c1-475e-b242-f17aad20ba2c", "dd13f25f-77a0-4a2c-83af-bb187b79a389", "17b07883-4959-4037-9b80-dde9a06b80ae", "5af23884-b630-496c-b04e-b9db94250307", "61519ad0-2aea-4250-9a82-4dcdb93a569c", "326a6c55-c963-42c2-bb8a-2591993aaaa2", "a54d9ef4-a46a-418b-b6cc-f10b49a946ac", "2fb5e890-afaf-428a-8e28-a7c70bf8bdf1", "c93a811e-1f47-43b6-aef9-c09338e43947", "9e4aeae9-8729-4b0f-9e84-5c1f4eeacc74", "479eea59-8ad8-46aa-9456-29ab1b8f2cb2", "b4ff7130-3055-4ed1-a878-8dfaca7191ac", "fbd7bc95-9f42-4c2c-94f4-27fd78a7273c", "bbc41a1e-4bbe-4417-a40c-64635cc552e6", "82a921b9-361a-4fd5-8db7-98961fdbf25a", "66a5cdf8-3b0d-4d85-bdcc-68ae69205206", "d4691f19-57be-44c4-afc2-5b5f82ec27b5", "136f8891-af6f-49c1-a69a-b4acd7116a3c" ] parser = argparse.ArgumentParser( description='CerebralCortex Kafka Message Handler.') parser.add_argument("-c", "--config_filepath", help="Configuration file path", required=True) # parser.add_argument("-d", "--data_dir", help="Directory path where all the gz files are stored by API-Server", # required=True) parser.add_argument( "-bd", "--batch_duration", help= "How frequent kafka messages shall be checked (duration in seconds)", default="5", required=False) parser.add_argument( "-mbs", "--mydb_batch_size", help="Total number of messages to fetch from MySQL for processing.", default="5000", required=True) parser.add_argument( "-participants", "--participants", help="Whether run data replay on all participants or select one.", default="all", required=False) args = vars(parser.parse_args()) participants = args["participants"] mydb_batch_size = args["mydb_batch_size"] config_filepath = str(args["config_filepath"]).strip() batch_duration = int(args["batch_duration"]) # data_path = str(args["data_dir"]).strip() # if (data_path[-1] != '/'): # data_path += '/' # Kafka Consumer Configs spark_context = get_or_create_sc(type="sparkContext") spark_context.setLogLevel("WARN") consumer_group_id = "md2k-test" CC = CerebralCortex(config_filepath) broker = str(CC.config["kafkaserver"]["host"]) + ":" + str( CC.config["kafkaserver"]["port"]) data_replay_using = str(CC.config["data_replay"]["replay_type"]) data_path = CC.config["data_replay"]["data_dir"] if data_replay_using == "mydb": for replay_batch in CC.SqlData.get_replay_batch( record_limit=mydb_batch_size): new_replay_batch = [] #get records from mysql and process (skip kafka) if participants == "all": new_replay_batch = replay_batch else: for rb in replay_batch: if rb["owner_id"] in selected_participants: new_replay_batch.append(rb) mysql_batch_to_db(spark_context, new_replay_batch, data_path, config_filepath) else: ssc = StreamingContext(spark_context, batch_duration) kafka_files_stream = spark_kafka_consumer(["filequeue"], ssc, broker, consumer_group_id, CC) if kafka_files_stream is not None: kafka_files_stream.foreachRDD( lambda rdd: kafka_file_to_json_producer( rdd, data_path, config_filepath, CC)) ssc.start() ssc.awaitTermination()
parser.add_argument('--conf', dest='configuration_file', required=True, help='Cerebral Cortex configuration file') parser.add_argument('--output', dest='root_dir', required=True, help='Base output directory') parser.add_argument('--study', dest='study_name', default='mperf', help='Study name') args = parser.parse_args() root_dir = os.path.join(args.root_dir) CC = CerebralCortex(args.configuration_file) def append_csv(ds): with gzip.open(os.path.join(root_dir, ds.owner) + '/' + ds.name + '___' + ds.identifier + '.csv.gz', 'at', compresslevel=1, encoding='utf-8') as f: for dp in ds.data: if type(dp.sample) is list: dp.sample = ','.join(map(str, dp.sample)) if type(dp.sample) is str and dp.sample[-1] is '\n': dp.sample = dp.sample[:-1]