def generate_metadata_dailystats(): stream_metadata = Metadata() stream_metadata.set_name('mcontain-md2k--daily-stats').set_description('Daily stats for website') \ .add_dataDescriptor( DataDescriptor().set_name("start_time").set_type("timestamp").set_attribute("description", \ "Start time of the day in localtime")) \ .add_dataDescriptor( DataDescriptor().set_name("end_time").set_type("timestamp").set_attribute("description", \ "End time of the day in localtime")) \ .add_dataDescriptor( DataDescriptor().set_name("number_of_app_users").set_type("double").set_attribute("description", \ "Total number of app users")) \ .add_dataDescriptor( DataDescriptor().set_name("encounter_per_user").set_type("double").set_attribute("description", \ "Average encounter per user")) \ .add_dataDescriptor( DataDescriptor().set_name("total_covid_encounters").set_type("double").set_attribute("description", \ "Total covid encounters on the day")) \ .add_dataDescriptor( DataDescriptor().set_name("maximum_concurrent_encounters").set_type("double").set_attribute("description", \ "Maximum concurrent encounters")) stream_metadata.add_module( ModuleMetadata().set_name('Daily encounter stats for all the users to be shown in website') \ .set_attribute("url", "https://mcontain.md2k.org").set_author( "Md Azim Ullah", "*****@*****.**")) return stream_metadata
def generate_metadata_hourly(): stream_metadata = Metadata() stream_metadata.set_name('mcontain-md2k--visualization-stats--time-window').set_description('Computes visualization stats every time window defined by start time and end time') \ .add_dataDescriptor( DataDescriptor().set_name("start_time").set_type("timestamp").set_attribute("description", \ "Start time of the time window localtime")) \ .add_dataDescriptor( DataDescriptor().set_name("end_time").set_type("timestamp").set_attribute("description", \ "End time of the time window in localtime")) \ .add_dataDescriptor( DataDescriptor().set_name("latitude").set_type("double").set_attribute("description", \ "Latitude of centroid location, a gps cluster output grouping encounters in similar location together")) \ .add_dataDescriptor( DataDescriptor().set_name("longitude").set_type("double").set_attribute("description", \ "Longitude of centroid location, a gps cluster output grouping encounters in similar location together")) \ .add_dataDescriptor( DataDescriptor().set_name("n_users").set_type("integer").set_attribute("description", \ "Number of unique users in that cluster centroid")) \ .add_dataDescriptor( DataDescriptor().set_name("total_encounters").set_type("double").set_attribute("description", \ "Total encounters happening in the time window in this specific location")) \ .add_dataDescriptor( DataDescriptor().set_name("normalized_total_encounters").set_type("double").set_attribute("description", \ "Total encounters normalized by the centroid area. (encounters per 10 square meter)")) \ .add_dataDescriptor( DataDescriptor().set_name("avg_encounters").set_type("double").set_attribute("description", \ "average encounter per participant(participants who had at least one encounter)")) stream_metadata.add_module( ModuleMetadata().set_name('Visualization stats computation in a time window between start time and end time') \ .set_attribute("url", "https://mcontain.md2k.org").set_author( "Md Azim Ullah", "*****@*****.**")) return stream_metadata
def get_metadata(): stream_name = 'fill in your stream name' stream_metadata = Metadata() stream_metadata.set_name(stream_name).set_description("Sequence Aligment, Timestamp Correction and Decoding of MotionsenseHRV") \ .add_dataDescriptor( DataDescriptor().set_name("red").set_type("float").set_attribute("description", \ "Value of Red LED - PPG")) \ .add_dataDescriptor( \ DataDescriptor().set_name("infrared").set_type("float").set_attribute("description", \ "Value of Infrared LED - PPG")) \ .add_dataDescriptor( \ DataDescriptor().set_name("green").set_type("float").set_attribute("description", \ "Value of Green LED - PPG")) \ .add_dataDescriptor( \ DataDescriptor().set_name("aclx").set_type("float").set_attribute("description", \ "Wrist Accelerometer X-axis")) \ .add_dataDescriptor( \ DataDescriptor().set_name("acly").set_type("float").set_attribute("description", \ "Wrist Accelerometer Y-axis")) \ .add_dataDescriptor( \ DataDescriptor().set_name("aclz").set_type("float").set_attribute("description", \ "Wrist Accelerometer Z-axis")) \ .add_dataDescriptor( \ DataDescriptor().set_name("gyrox").set_type("float").set_attribute("description", \ "Wrist Gyroscope X-axis")) \ .add_dataDescriptor( \ DataDescriptor().set_name("gyroy").set_type("float").set_attribute("description", \ "Wrist Gyroscope Y-axis")) \ .add_dataDescriptor( \ DataDescriptor().set_name("gyroz").set_type("float").set_attribute("description", \ "Wrist Gyroscope Z-axis")).add_module( \ ModuleMetadata().set_name("cerebralcortex.algorithms.raw_byte_decode.motionsenseHRV.py").set_attribute("url", "hhtps://md2k.org").set_author( "Md Azim Ullah", "*****@*****.**")) return stream_metadata
def generate_metadata_notif(): stream_metadata = Metadata() stream_metadata.set_name('mcontain-md2k--user-notifications').set_description('Notification generated for the Covid-19 encountered users.') \ .add_dataDescriptor( DataDescriptor().set_name("user").set_type("string").set_attribute("description", \ "user id")) \ .add_dataDescriptor( DataDescriptor().set_name("timestamp").set_type("timestamp").set_attribute("description", \ "Unix timestamp when the message was generated")) \ .add_dataDescriptor( DataDescriptor().set_name("localtime").set_type("timestamp").set_attribute("description", \ "Local timestamp when the message was generated.")) \ .add_dataDescriptor( DataDescriptor().set_name("message").set_type("string").set_attribute("description", \ "Generated notification message")) \ .add_dataDescriptor( DataDescriptor().set_name("day").set_type("timestamp").set_attribute("description", \ "day of the encounter")) \ .add_dataDescriptor( DataDescriptor().set_name("version").set_type("int").set_attribute("description", \ "version")) stream_metadata.add_module( ModuleMetadata().set_name('Generated notification for a user encountered with Covid-19 participant') \ .set_attribute("url", "https://mcontain.md2k.org").set_author( "Md Shiplu Hawlader", "*****@*****.**").set_version(1)) return stream_metadata
def generate_metadata_encounter_daily(): stream_metadata = Metadata() stream_metadata.set_name('mcontain-md2k-encounter-daily--bluetooth-gps').set_description('Contains each unique encounters between two persons along with the location of encounter') \ .add_dataDescriptor( DataDescriptor().set_name("start_time").set_type("timestamp").set_attribute("description", \ "Start time of the encounter in localtime")) \ .add_dataDescriptor( DataDescriptor().set_name("end_time").set_type("timestamp").set_attribute("description", \ "End time of the encounter in localtime")) \ .add_dataDescriptor( DataDescriptor().set_name("participant_identifier").set_type("string").set_attribute("description", \ "Participant with whom encounter happened")) \ .add_dataDescriptor( DataDescriptor().set_name("os").set_type("string").set_attribute("description", \ "Operating system of the phone belonging to user")) \ .add_dataDescriptor( DataDescriptor().set_name("latitude").set_type("double").set_attribute("description", \ "Latitude of encounter location")) \ .add_dataDescriptor( DataDescriptor().set_name("longitude").set_type("double").set_attribute("description", \ "Longitude of encounter location")) \ .add_dataDescriptor( DataDescriptor().set_name("durations").set_type("array").set_attribute("description", \ "Mean distance between participants in encounter")) \ .add_dataDescriptor( DataDescriptor().set_name("covid").set_type("integer").set_attribute("description", \ "0, 1 or 2 indicating if this encounter contained a covid user -- 0 - no covid-19 affected, 1 - user is, 2 - participant identifier is")) stream_metadata.add_module( ModuleMetadata().set_name('Encounter computation after parsing raw bluetooth-gps data, clustering gps locations and removing double counting') \ .set_attribute("url", "https://mcontain.md2k.org").set_author( "Md Azim Ullah", "*****@*****.**")) return stream_metadata
def gen_phone_battery_metadata(stream_name) -> Metadata: """ Create Metadata object with some sample metadata of phone battery data Returns: Metadata: metadata of phone battery stream """ stream_metadata = Metadata() stream_metadata.set_name(stream_name).set_version(1).set_description("mobile phone battery sample data stream.") \ .add_dataDescriptor( DataDescriptor().set_name("level").set_type("float").set_attribute("description", "current battery charge")) \ .add_module( ModuleMetadata().set_name("battery").set_version("1.2.4").set_attribute("attribute_key", "attribute_value").set_author( "test_user", "test_user@test_email.com")) stream_metadata.is_valid() return stream_metadata
def generate_metadata_user_encounter_count(): stream_metadata = Metadata() stream_metadata.set_name('mcontain-md2k--user--encounter-count').set_description('Number of encounter in a given time window') \ .add_dataDescriptor( DataDescriptor().set_name("start_time").set_type("timestamp").set_attribute("description", \ "Start time of the time window in localtime")) \ .add_dataDescriptor( DataDescriptor().set_name("end_time").set_type("timestamp").set_attribute("description", \ "End time of the time window in localtime")) \ .add_dataDescriptor( DataDescriptor().set_name("encounter_count").set_type("int").set_attribute("description", \ "Total number of encounter for the user in the given time window")) stream_metadata.add_module( ModuleMetadata().set_name('Total number of encounter for a user in a given time window') \ .set_attribute("url", "https://mcontain.md2k.org").set_author( "Md Shiplu Hawlader, Md Azim Ullah", "[email protected], [email protected]").set_version(1)) return stream_metadata
def get_metadata(): stream_metadata = Metadata() stream_metadata.set_name(stream_name).set_description("Stress episodes computed using MACD formula.") \ .add_input_stream(ecg_stress_probability.metadata.get_name()) \ .add_dataDescriptor(DataDescriptor().set_name("timestamp").set_type("datetime")) \ .add_dataDescriptor(DataDescriptor().set_name("localtime").set_type("datetime")) \ .add_dataDescriptor(DataDescriptor().set_name("version").set_type("int")) \ .add_dataDescriptor(DataDescriptor().set_name("user").set_type("string")) \ .add_dataDescriptor( DataDescriptor().set_name("stress_probability").set_type("float")) \ .add_dataDescriptor( DataDescriptor().set_name("stress_episode").set_type("string").set_attribute("description", \ "stress episodes calculated using MACD")) \ .add_module( ModuleMetadata().set_name("cerebralcortex.algorithm.stress_prediction.stress_episodes.compute_stress_episodes") .set_attribute("url", "http://md2k.org/").set_author( "Anandatirtha Nandugudi", "*****@*****.**")) return stream_metadata
def get_metadata(): """ generate metadata for the stream Returns: MetaData object """ stream_metadata = Metadata() stream_metadata.set_name(stream_name).set_description("ECG RR interval in milliseconds") \ .add_input_stream(ecg_data.metadata.get_name()) \ .add_dataDescriptor( DataDescriptor().set_name("rr").set_type("float") \ .set_attribute("description","rr interval")) \ .add_module( ModuleMetadata().set_name("ecg rr interval") \ .set_attribute("url","http://md2k.org/") \ .set_attribute('algorithm','pan-tomkins').set_attribute('unit','ms').set_author("Md Azim Ullah", "*****@*****.**")) return stream_metadata
def get_metadata(): stream_metadata = Metadata() stream_metadata.set_name(stream_name).set_description("stress likelihood computed from ECG") \ .add_input_stream(stress_features_normalized.metadata.get_name()) \ .add_dataDescriptor( DataDescriptor().set_name("stress_probability") .set_type("double").set_attribute("description","stress likelihood computed from ECG only model") .set_attribute("threshold","0.47")) \ .add_dataDescriptor( DataDescriptor().set_name("window") .set_type("struct") .set_attribute("description", "window start and end time in UTC") .set_attribute('start', 'start of 1 minute window') .set_attribute('end','end of 1 minute window')) \ .add_module( ModuleMetadata().set_name("ECG Stress Model") .set_attribute("url", "http://md2k.org/") .set_attribute('algorithm','cStress') .set_attribute('unit','ms').set_author("Md Azim Ullah", "*****@*****.**")) return stream_metadata
def generate_metadata_notification_daily(): stream_metadata = Metadata() stream_metadata.set_name('mcontain-md2k--crowd--notification--daily').set_description('Computes notifications for each user who dwelled in a crowded hotspot') \ .add_dataDescriptor( DataDescriptor().set_name("start_time").set_type("timestamp").set_attribute("description", \ "Start time of the time window localtime")) \ .add_dataDescriptor( DataDescriptor().set_name("end_time").set_type("timestamp").set_attribute("description", \ "End time of the time window in localtime")) \ .add_dataDescriptor( DataDescriptor().set_name("centroid_latitude").set_type("double").set_attribute("description", \ "Latitude of centroid location, a gps cluster output grouping encounters in similar location together")) \ .add_dataDescriptor( DataDescriptor().set_name("centroid_longitude").set_type("double").set_attribute("description", \ "Longitude of centroid location, a gps cluster output grouping encounters in similar location together")) \ .add_dataDescriptor( DataDescriptor().set_name("centroid_area").set_type("double").set_attribute("description", \ "area of centroid")) \ .add_dataDescriptor( DataDescriptor().set_name("durations").set_type("double").set_attribute("description", \ "duration of stay in the centroid in hours")) \ .add_dataDescriptor( DataDescriptor().set_name("unique_users").set_type("integer").set_attribute("description", \ "Number of unique users in that cluster centroid")) \ .add_dataDescriptor( DataDescriptor().set_name("total_encounters").set_type("double").set_attribute("description", \ "Total encounters happening in the time window in this specific location")) \ .add_dataDescriptor( DataDescriptor().set_name("normalized_total_encounters").set_type("double").set_attribute("description", \ "Total encounters normalized by the centroid area. (encounters per 10 square meter)")) \ .add_dataDescriptor( DataDescriptor().set_name("avg_encounters").set_type("double").set_attribute("description", \ "average encounter per participant(participants who had at least one encounter)")) stream_metadata.add_module( ModuleMetadata().set_name('Notification messages to be shown to each user') \ .set_attribute("url", "https://mcontain.md2k.org").set_author( "Md Azim Ullah", "*****@*****.**")) return stream_metadata
def get_metadata(stress_imputed_data, output_stream_name, input_stream_name): """ generate metadata for a datastream. Args: stress_imputed_data (DataStream): output_stream_name (str): Returns: """ schema = stress_imputed_data.schema stream_metadata = Metadata() stream_metadata.set_name(output_stream_name).set_description("stress imputed")\ .add_input_stream(input_stream_name) for field in schema.fields: stream_metadata.add_dataDescriptor(DataDescriptor().set_name( str(field.name)).set_type(str(field.dataType))) stream_metadata.add_module( ModuleMetadata().set_name("stress forward fill imputer") \ .set_attribute("url", "hhtps://md2k.org").set_author( "Md Azim Ullah", "*****@*****.**")) return stream_metadata
def get_metadata(): stream_metadata = Metadata() stream_metadata.set_name(stream_name).set_description("Chest ECG quality 3 seconds") \ .add_input_stream(ecg.metadata.get_name()) \ .add_dataDescriptor(DataDescriptor().set_name("timestamp").set_type("datetime")) \ .add_dataDescriptor(DataDescriptor().set_name("localtime").set_type("datetime")) \ .add_dataDescriptor(DataDescriptor().set_name("version").set_type("int")) \ .add_dataDescriptor(DataDescriptor().set_name("user").set_type("string")) \ .add_dataDescriptor( DataDescriptor().set_name("quality").set_type("string") \ .set_attribute("description", "ECG data quality") \ .set_attribute('Loose/Improper Attachment','Electrode Displacement') \ .set_attribute('Sensor off Body', 'Autosense not worn') \ .set_attribute('Battery down/Disconnected', 'No data is present - Can be due to battery down or sensor disconnection') \ .set_attribute('Intermittent Data Loss','Not enough samples are present') \ .set_attribute('Acceptable','Good Quality')) \ .add_dataDescriptor( DataDescriptor().set_name("ecg").set_type("double").set_attribute("description", \ "ecg sample value")) \ .add_module( ModuleMetadata().set_name("ecg data quality").set_attribute("url", "http://md2k.org/").set_author( "Md Azim Ullah", "*****@*****.**")) return stream_metadata
def get_metadata(): stream_metadata = Metadata() stream_metadata.set_name(stream_name).set_description("HRV Features from ECG RR interval") \ .add_input_stream(rr_data.metadata.get_name()) \ .add_dataDescriptor( DataDescriptor() .set_name("var") .set_type("double") .set_attribute("description","variance")) \ .add_dataDescriptor( DataDescriptor() .set_name("iqr") .set_type("double") .set_attribute("description","Inter Quartile Range")) \ .add_dataDescriptor( DataDescriptor() .set_name("mean") .set_type("double") .set_attribute("description","Mean RR Interval")) \ .add_dataDescriptor( DataDescriptor() .set_name("median") .set_type("double") .set_attribute("description","Median RR Interval")) \ .add_dataDescriptor( DataDescriptor() .set_name("80th") .set_type("double") .set_attribute("description","80th percentile RR Interval")) \ .add_dataDescriptor( DataDescriptor() .set_name("20th") .set_type("double") .set_attribute("description","20th percentile RR Interval")) \ .add_dataDescriptor( DataDescriptor() .set_name("heartrate") .set_type("double") .set_attribute("description","Heart Rate in BPM")) \ .add_dataDescriptor( DataDescriptor() .set_name("vlf") .set_type("double") .set_attribute("description","Very Low Frequency Energy")) \ .add_dataDescriptor( DataDescriptor() .set_name("lf") .set_type("double") .set_attribute("description","Low Frequency Energy")) \ .add_dataDescriptor( DataDescriptor() .set_name("hf") .set_type("double") .set_attribute("description","High Frequency Energy")) \ .add_dataDescriptor( DataDescriptor() .set_name("lfhf") .set_type("double") .set_attribute("description","Low frequency to High Frequency energy ratio")) \ .add_dataDescriptor( DataDescriptor() .set_name("window") .set_type("struct") .set_attribute("description","window start and end time in UTC") .set_attribute('start','start of window') .set_attribute('end','end of window')) \ .add_module( ModuleMetadata().set_name("HRV Features from ECG RR Interval") .set_attribute("url", "http://md2k.org/") .set_attribute('algorithm','ecg feature computation') .set_attribute('unit','ms') .set_author("Md Azim Ullah", "*****@*****.**")) return stream_metadata
def gen_location_datastream(user_id, stream_name) -> object: """ Create pyspark dataframe with some sample gps data (Memphis, TN, lat, long, alt coordinates) Args: user_id (str): id of a user stream_name (str): sample gps stream name Returns: DataStream: datastream object of gps location stream with its metadata """ column_name = [ "timestamp", "localtime", "user", "version", "latitude", "longitude", "altitude", "speed", "bearing", "accuracy" ] sample_data = [] timestamp = datetime(2019, 9, 1, 11, 34, 59) sqlContext = get_or_create_sc("sqlContext") lower_left = [35.079678, -90.074136] upper_right = [35.194771, -89.868766] alt = [i for i in range(83, 100)] for location in range(5): lat = random.uniform(lower_left[0], upper_right[0]) long = random.uniform(lower_left[1], upper_right[1]) for dp in range(150): lat_val = random.gauss(lat, 0.001) long_val = random.gauss(long, 0.001) alt_val = random.choice(alt) speed_val = round(random.uniform(0.0, 5.0), 6) bearing_val = round(random.uniform(0.0, 350), 6) accuracy_val = round(random.uniform(10.0, 30.4), 6) timestamp = timestamp + timedelta(minutes=1) localtime = timestamp + timedelta(hours=5) sample_data.append( (timestamp, localtime, user_id, 1, lat_val, long_val, alt_val, speed_val, bearing_val, accuracy_val)) df = sqlContext.createDataFrame(sample_data, column_name) stream_metadata = Metadata() stream_metadata.set_name(stream_name).set_description("GPS sample data stream.") \ .add_dataDescriptor( DataDescriptor().set_name("latitude").set_type("float").set_attribute("description", "gps latitude")) \ .add_dataDescriptor( DataDescriptor().set_name("longitude").set_type("float").set_attribute("description", "gps longitude")) \ .add_dataDescriptor( DataDescriptor().set_name("altitude").set_type("float").set_attribute("description", "gps altitude")) \ .add_dataDescriptor( DataDescriptor().set_name("speed").set_type("float").set_attribute("description", "speed info")) \ .add_dataDescriptor( DataDescriptor().set_name("bearing").set_type("float").set_attribute("description", "bearing info")) \ .add_dataDescriptor( DataDescriptor().set_name("accuracy").set_type("float").set_attribute("description", "accuracy of gps location")) \ .add_module( ModuleMetadata().set_name("examples.util.data_helper.gen_location_data").set_attribute("attribute_key", "attribute_value").set_author( "Nasir Ali", "*****@*****.**")) stream_metadata.is_valid() ds = DataStream(data=df, metadata=stream_metadata) return ds
def gen_location_datastream(user_id, stream_name) -> object: """ Create pyspark dataframe with some sample gps data (Memphis, TN, lat, long, alt coordinates) Args: user_id (str): id of a user stream_name (str): sample gps stream name Returns: DataStream: datastream object of gps location stream with its metadata """ column_name = [ "timestamp", "localtime", "user", "version", "latitude", "longitude", "altitude", "speed", "bearing", "accuracy" ] sample_data = [] timestamp = datetime(2019, 1, 9, 11, 34, 59) sqlContext = get_or_create_sc("sqlContext") lat = [ 35.1247391, 35.1257391, 35.1217391, 35.1117391, 35.1317391, 35.1287391, 35.5217391 ] long = [ -89.9750021, -89.9710021, -89.9800021, -89.9670021, -89.9790021, -89.9710021, -89.8700021 ] alt = [83.0, 84.0, 85.0, 86.0, 87.0, 88.0, 89.0] for dp in range(500): lat_val = random.choice(lat) long_val = random.choice(long) alt_val = random.choice(alt) #ts_val = 15094)+(16272882+(dp*1000000)) speed_val = round(random.uniform(0.0, 5.0), 6) bearing_val = round(random.uniform(0.0, 350), 6) accuracy_val = round(random.uniform(10.0, 30.4), 6) #all_dps = ",".join([ts_val, lat_val, long_val, alt_val, speed_val, bearing_val, accuracy_val]) timestamp = timestamp + timedelta(minutes=1) localtime = timestamp + timedelta(hours=5) sample_data.append( (timestamp, localtime, user_id, 1, lat_val, long_val, alt_val, speed_val, bearing_val, accuracy_val)) df = sqlContext.createDataFrame(sample_data, column_name) stream_metadata = Metadata() stream_metadata.set_name(stream_name).set_version(1).set_description("GPS sample data stream.") \ .add_dataDescriptor( DataDescriptor().set_name("latitude").set_type("float").set_attribute("description", "gps latitude")) \ .add_dataDescriptor( DataDescriptor().set_name("longitude").set_type("float").set_attribute("description", "gps longitude")) \ .add_dataDescriptor( DataDescriptor().set_name("altitude").set_type("float").set_attribute("description", "gps altitude")) \ .add_dataDescriptor( DataDescriptor().set_name("speed").set_type("float").set_attribute("description", "speed info")) \ .add_dataDescriptor( DataDescriptor().set_name("bearing").set_type("float").set_attribute("description", "bearing info")) \ .add_dataDescriptor( DataDescriptor().set_name("accuracy").set_type("float").set_attribute("description", "accuracy of gps location")) \ .add_module( ModuleMetadata().set_name("examples.util.data_helper.gen_location_data").set_version("0.0.1").set_attribute("attribute_key", "attribute_value").set_author( "test_user", "test_user@test_email.com")) stream_metadata.is_valid() return DataStream(data=df, metadata=stream_metadata)