def generate_metadata_dailystats():
    stream_metadata = Metadata()
    stream_metadata.set_name('mcontain-md2k--daily-stats').set_description('Daily stats for website') \
        .add_dataDescriptor(
        DataDescriptor().set_name("start_time").set_type("timestamp").set_attribute("description", \
                                                                                    "Start time of the day in localtime")) \
        .add_dataDescriptor(
        DataDescriptor().set_name("end_time").set_type("timestamp").set_attribute("description", \
                                                                                  "End time of the day in localtime")) \
        .add_dataDescriptor(
        DataDescriptor().set_name("number_of_app_users").set_type("double").set_attribute("description", \
                                                                                          "Total number of app users")) \
        .add_dataDescriptor(
        DataDescriptor().set_name("encounter_per_user").set_type("double").set_attribute("description", \
                                                                                         "Average encounter per user")) \
        .add_dataDescriptor(
        DataDescriptor().set_name("total_covid_encounters").set_type("double").set_attribute("description", \
                                                                                             "Total covid encounters on the day")) \
        .add_dataDescriptor(
        DataDescriptor().set_name("maximum_concurrent_encounters").set_type("double").set_attribute("description", \
                                                                                                    "Maximum concurrent encounters"))
    stream_metadata.add_module(
        ModuleMetadata().set_name('Daily encounter stats for all the users to be shown in website') \
            .set_attribute("url", "https://mcontain.md2k.org").set_author(
            "Md Azim Ullah", "*****@*****.**"))
    return stream_metadata
示例#2
0
def generate_metadata_hourly():
    stream_metadata = Metadata()
    stream_metadata.set_name('mcontain-md2k--visualization-stats--time-window').set_description('Computes visualization stats every time window defined by start time and end time') \
        .add_dataDescriptor(
        DataDescriptor().set_name("start_time").set_type("timestamp").set_attribute("description", \
                                                                                    "Start time of the time window localtime")) \
        .add_dataDescriptor(
        DataDescriptor().set_name("end_time").set_type("timestamp").set_attribute("description", \
                                                                                  "End time of the time window in localtime")) \
        .add_dataDescriptor(
        DataDescriptor().set_name("latitude").set_type("double").set_attribute("description", \
                                                                               "Latitude of centroid location, a gps cluster output grouping encounters in similar location together")) \
        .add_dataDescriptor(
        DataDescriptor().set_name("longitude").set_type("double").set_attribute("description", \
                                                                                "Longitude of centroid location, a gps cluster output grouping encounters in similar location together")) \
        .add_dataDescriptor(
        DataDescriptor().set_name("n_users").set_type("integer").set_attribute("description", \
                                                                               "Number of unique users in that cluster centroid")) \
        .add_dataDescriptor(
        DataDescriptor().set_name("total_encounters").set_type("double").set_attribute("description", \
                                                                                       "Total encounters happening in the time window in this specific location")) \
        .add_dataDescriptor(
        DataDescriptor().set_name("normalized_total_encounters").set_type("double").set_attribute("description", \
                                                                                                  "Total encounters normalized by the centroid area. (encounters per 10 square meter)")) \
        .add_dataDescriptor(
        DataDescriptor().set_name("avg_encounters").set_type("double").set_attribute("description", \
                                                                                     "average encounter per participant(participants who had at least one encounter)"))
    stream_metadata.add_module(
        ModuleMetadata().set_name('Visualization stats computation in a time window between start time and end time') \
            .set_attribute("url", "https://mcontain.md2k.org").set_author(
            "Md Azim Ullah", "*****@*****.**"))
    return stream_metadata
def get_metadata():
    stream_name = 'fill in your stream name'
    stream_metadata = Metadata()
    stream_metadata.set_name(stream_name).set_description("Sequence Aligment, Timestamp Correction and Decoding of MotionsenseHRV") \
        .add_dataDescriptor(
        DataDescriptor().set_name("red").set_type("float").set_attribute("description", \
                                                                         "Value of Red LED - PPG")) \
        .add_dataDescriptor( \
        DataDescriptor().set_name("infrared").set_type("float").set_attribute("description", \
                                                                              "Value of Infrared LED - PPG")) \
        .add_dataDescriptor( \
        DataDescriptor().set_name("green").set_type("float").set_attribute("description", \
                                                                           "Value of Green LED - PPG")) \
        .add_dataDescriptor( \
        DataDescriptor().set_name("aclx").set_type("float").set_attribute("description", \
                                                                          "Wrist Accelerometer X-axis")) \
        .add_dataDescriptor( \
        DataDescriptor().set_name("acly").set_type("float").set_attribute("description", \
                                                                          "Wrist Accelerometer Y-axis")) \
        .add_dataDescriptor( \
        DataDescriptor().set_name("aclz").set_type("float").set_attribute("description", \
                                                                          "Wrist Accelerometer Z-axis")) \
        .add_dataDescriptor( \
        DataDescriptor().set_name("gyrox").set_type("float").set_attribute("description", \
                                                                           "Wrist Gyroscope X-axis")) \
        .add_dataDescriptor( \
        DataDescriptor().set_name("gyroy").set_type("float").set_attribute("description", \
                                                                           "Wrist Gyroscope Y-axis")) \
        .add_dataDescriptor( \
        DataDescriptor().set_name("gyroz").set_type("float").set_attribute("description", \
                                                                           "Wrist Gyroscope Z-axis")).add_module( \
        ModuleMetadata().set_name("cerebralcortex.algorithms.raw_byte_decode.motionsenseHRV.py").set_attribute("url", "hhtps://md2k.org").set_author(
            "Md Azim Ullah", "*****@*****.**"))
    return stream_metadata
def generate_metadata_notif():
    stream_metadata = Metadata()
    stream_metadata.set_name('mcontain-md2k--user-notifications').set_description('Notification generated for the Covid-19 encountered users.') \
        .add_dataDescriptor(
        DataDescriptor().set_name("user").set_type("string").set_attribute("description", \
                                                                           "user id")) \
        .add_dataDescriptor(
        DataDescriptor().set_name("timestamp").set_type("timestamp").set_attribute("description", \
                                                                                   "Unix timestamp when the message was generated")) \
        .add_dataDescriptor(
        DataDescriptor().set_name("localtime").set_type("timestamp").set_attribute("description", \
                                                                                   "Local timestamp when the message was generated.")) \
        .add_dataDescriptor(
        DataDescriptor().set_name("message").set_type("string").set_attribute("description", \
                                                                              "Generated notification message")) \
        .add_dataDescriptor(
        DataDescriptor().set_name("day").set_type("timestamp").set_attribute("description", \
                                                                             "day of the encounter")) \
        .add_dataDescriptor(
        DataDescriptor().set_name("version").set_type("int").set_attribute("description", \
                                                                           "version"))
    stream_metadata.add_module(
        ModuleMetadata().set_name('Generated notification for a user encountered with Covid-19 participant') \
            .set_attribute("url", "https://mcontain.md2k.org").set_author(
            "Md Shiplu Hawlader", "*****@*****.**").set_version(1))
    return stream_metadata
def generate_metadata_encounter_daily():
    stream_metadata = Metadata()
    stream_metadata.set_name('mcontain-md2k-encounter-daily--bluetooth-gps').set_description('Contains each unique encounters between two persons along with the location of encounter') \
        .add_dataDescriptor(
        DataDescriptor().set_name("start_time").set_type("timestamp").set_attribute("description", \
                                                                                    "Start time of the encounter in localtime")) \
        .add_dataDescriptor(
        DataDescriptor().set_name("end_time").set_type("timestamp").set_attribute("description", \
                                                                                  "End time of the encounter in localtime")) \
        .add_dataDescriptor(
        DataDescriptor().set_name("participant_identifier").set_type("string").set_attribute("description", \
                                                                                             "Participant with whom encounter happened")) \
        .add_dataDescriptor(
        DataDescriptor().set_name("os").set_type("string").set_attribute("description", \
                                                                         "Operating system of the phone belonging to user")) \
        .add_dataDescriptor(
        DataDescriptor().set_name("latitude").set_type("double").set_attribute("description", \
                                                                               "Latitude of encounter location")) \
        .add_dataDescriptor(
        DataDescriptor().set_name("longitude").set_type("double").set_attribute("description", \
                                                                                "Longitude of encounter location")) \
        .add_dataDescriptor(
        DataDescriptor().set_name("durations").set_type("array").set_attribute("description", \
                                                                               "Mean distance between participants in encounter")) \
        .add_dataDescriptor(
        DataDescriptor().set_name("covid").set_type("integer").set_attribute("description", \
                                                                             "0, 1 or 2 indicating if this encounter contained a covid user -- 0 - no covid-19 affected, 1 - user is, 2 - participant identifier is"))
    stream_metadata.add_module(
        ModuleMetadata().set_name('Encounter computation after parsing raw bluetooth-gps data, clustering gps locations and removing double counting') \
            .set_attribute("url", "https://mcontain.md2k.org").set_author(
            "Md Azim Ullah", "*****@*****.**"))
    return stream_metadata
def gen_phone_battery_metadata(stream_name) -> Metadata:
    """
    Create Metadata object with some sample metadata of phone battery data
    Returns:
        Metadata: metadata of phone battery stream
    """
    stream_metadata = Metadata()
    stream_metadata.set_name(stream_name).set_version(1).set_description("mobile phone battery sample data stream.") \
        .add_dataDescriptor(
        DataDescriptor().set_name("level").set_type("float").set_attribute("description", "current battery charge")) \
        .add_module(
        ModuleMetadata().set_name("battery").set_version("1.2.4").set_attribute("attribute_key", "attribute_value").set_author(
            "test_user", "test_user@test_email.com"))
    stream_metadata.is_valid()
    return stream_metadata
def generate_metadata_user_encounter_count():
    stream_metadata = Metadata()
    stream_metadata.set_name('mcontain-md2k--user--encounter-count').set_description('Number of encounter in a given time window') \
        .add_dataDescriptor(
        DataDescriptor().set_name("start_time").set_type("timestamp").set_attribute("description", \
                                                                                    "Start time of the time window in localtime")) \
        .add_dataDescriptor(
        DataDescriptor().set_name("end_time").set_type("timestamp").set_attribute("description", \
                                                                                  "End time of the time window in localtime")) \
        .add_dataDescriptor(
        DataDescriptor().set_name("encounter_count").set_type("int").set_attribute("description", \
                                                                                   "Total number of encounter for the user in the given time window"))
    stream_metadata.add_module(
        ModuleMetadata().set_name('Total number of encounter for a user in a given time window') \
            .set_attribute("url", "https://mcontain.md2k.org").set_author(
            "Md Shiplu Hawlader, Md Azim Ullah", "[email protected], [email protected]").set_version(1))
    return stream_metadata
示例#8
0
 def get_metadata():
     stream_metadata = Metadata()
     stream_metadata.set_name(stream_name).set_description("Stress episodes computed using MACD formula.") \
         .add_input_stream(ecg_stress_probability.metadata.get_name()) \
         .add_dataDescriptor(DataDescriptor().set_name("timestamp").set_type("datetime")) \
         .add_dataDescriptor(DataDescriptor().set_name("localtime").set_type("datetime")) \
         .add_dataDescriptor(DataDescriptor().set_name("version").set_type("int")) \
         .add_dataDescriptor(DataDescriptor().set_name("user").set_type("string")) \
         .add_dataDescriptor(
         DataDescriptor().set_name("stress_probability").set_type("float")) \
         .add_dataDescriptor(
         DataDescriptor().set_name("stress_episode").set_type("string").set_attribute("description", \
                                                                           "stress episodes calculated using MACD")) \
         .add_module(
         ModuleMetadata().set_name("cerebralcortex.algorithm.stress_prediction.stress_episodes.compute_stress_episodes")
             .set_attribute("url", "http://md2k.org/").set_author(
             "Anandatirtha Nandugudi", "*****@*****.**"))
     return stream_metadata
    def get_metadata():
        """
        generate metadata for the stream

        Returns:
            MetaData object
        """
        stream_metadata = Metadata()
        stream_metadata.set_name(stream_name).set_description("ECG RR interval in milliseconds") \
            .add_input_stream(ecg_data.metadata.get_name()) \
            .add_dataDescriptor(
            DataDescriptor().set_name("rr").set_type("float") \
                .set_attribute("description","rr interval")) \
            .add_module(
            ModuleMetadata().set_name("ecg rr interval") \
                .set_attribute("url","http://md2k.org/") \
                .set_attribute('algorithm','pan-tomkins').set_attribute('unit','ms').set_author("Md Azim Ullah", "*****@*****.**"))
        return stream_metadata
示例#10
0
 def get_metadata():
     stream_metadata = Metadata()
     stream_metadata.set_name(stream_name).set_description("stress likelihood computed from ECG") \
         .add_input_stream(stress_features_normalized.metadata.get_name()) \
         .add_dataDescriptor(
         DataDescriptor().set_name("stress_probability")
             .set_type("double").set_attribute("description","stress likelihood computed from ECG only model")
             .set_attribute("threshold","0.47")) \
         .add_dataDescriptor(
         DataDescriptor().set_name("window")
             .set_type("struct")
             .set_attribute("description", "window start and end time in UTC")
             .set_attribute('start', 'start of 1 minute window')
             .set_attribute('end','end of 1 minute window')) \
         .add_module(
         ModuleMetadata().set_name("ECG Stress Model")
             .set_attribute("url", "http://md2k.org/")
             .set_attribute('algorithm','cStress')
             .set_attribute('unit','ms').set_author("Md Azim Ullah", "*****@*****.**"))
     return stream_metadata
def generate_metadata_notification_daily():
    stream_metadata = Metadata()
    stream_metadata.set_name('mcontain-md2k--crowd--notification--daily').set_description('Computes notifications for each user who dwelled in a crowded hotspot') \
        .add_dataDescriptor(
        DataDescriptor().set_name("start_time").set_type("timestamp").set_attribute("description", \
                                                                                    "Start time of the time window localtime")) \
        .add_dataDescriptor(
        DataDescriptor().set_name("end_time").set_type("timestamp").set_attribute("description", \
                                                                                  "End time of the time window in localtime")) \
        .add_dataDescriptor(
        DataDescriptor().set_name("centroid_latitude").set_type("double").set_attribute("description", \
                                                                                        "Latitude of centroid location, a gps cluster output grouping encounters in similar location together")) \
        .add_dataDescriptor(
        DataDescriptor().set_name("centroid_longitude").set_type("double").set_attribute("description", \
                                                                                         "Longitude of centroid location, a gps cluster output grouping encounters in similar location together")) \
        .add_dataDescriptor(
        DataDescriptor().set_name("centroid_area").set_type("double").set_attribute("description", \
                                                                                    "area of centroid")) \
        .add_dataDescriptor(
        DataDescriptor().set_name("durations").set_type("double").set_attribute("description", \
                                                                                "duration of stay in the centroid in hours")) \
        .add_dataDescriptor(
        DataDescriptor().set_name("unique_users").set_type("integer").set_attribute("description", \
                                                                                    "Number of unique users in that cluster centroid")) \
        .add_dataDescriptor(
        DataDescriptor().set_name("total_encounters").set_type("double").set_attribute("description", \
                                                                                       "Total encounters happening in the time window in this specific location")) \
        .add_dataDescriptor(
        DataDescriptor().set_name("normalized_total_encounters").set_type("double").set_attribute("description", \
                                                                                                  "Total encounters normalized by the centroid area. (encounters per 10 square meter)")) \
        .add_dataDescriptor(
        DataDescriptor().set_name("avg_encounters").set_type("double").set_attribute("description", \
                                                                                     "average encounter per participant(participants who had at least one encounter)"))
    stream_metadata.add_module(
        ModuleMetadata().set_name('Notification messages to be shown to each user') \
            .set_attribute("url", "https://mcontain.md2k.org").set_author(
            "Md Azim Ullah", "*****@*****.**"))
    return stream_metadata
def get_metadata(stress_imputed_data, output_stream_name, input_stream_name):
    """
    generate metadata for a datastream.

    Args:
        stress_imputed_data (DataStream):
        output_stream_name (str):

    Returns:

    """
    schema = stress_imputed_data.schema
    stream_metadata = Metadata()
    stream_metadata.set_name(output_stream_name).set_description("stress imputed")\
        .add_input_stream(input_stream_name)
    for field in schema.fields:
        stream_metadata.add_dataDescriptor(DataDescriptor().set_name(
            str(field.name)).set_type(str(field.dataType)))
    stream_metadata.add_module(
        ModuleMetadata().set_name("stress forward fill imputer") \
            .set_attribute("url", "hhtps://md2k.org").set_author(
            "Md Azim Ullah", "*****@*****.**"))
    return stream_metadata
示例#13
0
 def get_metadata():
     stream_metadata = Metadata()
     stream_metadata.set_name(stream_name).set_description("Chest ECG quality 3 seconds") \
         .add_input_stream(ecg.metadata.get_name()) \
         .add_dataDescriptor(DataDescriptor().set_name("timestamp").set_type("datetime")) \
         .add_dataDescriptor(DataDescriptor().set_name("localtime").set_type("datetime")) \
         .add_dataDescriptor(DataDescriptor().set_name("version").set_type("int")) \
         .add_dataDescriptor(DataDescriptor().set_name("user").set_type("string")) \
         .add_dataDescriptor(
         DataDescriptor().set_name("quality").set_type("string") \
             .set_attribute("description", "ECG data quality") \
             .set_attribute('Loose/Improper Attachment','Electrode Displacement') \
             .set_attribute('Sensor off Body', 'Autosense not worn') \
             .set_attribute('Battery down/Disconnected', 'No data is present - Can be due to battery down or sensor disconnection') \
             .set_attribute('Intermittent Data Loss','Not enough samples are present') \
             .set_attribute('Acceptable','Good Quality')) \
         .add_dataDescriptor(
         DataDescriptor().set_name("ecg").set_type("double").set_attribute("description", \
                                                                           "ecg sample value")) \
         .add_module(
         ModuleMetadata().set_name("ecg data quality").set_attribute("url", "http://md2k.org/").set_author(
             "Md Azim Ullah", "*****@*****.**"))
     return stream_metadata
 def get_metadata():
     stream_metadata = Metadata()
     stream_metadata.set_name(stream_name).set_description("HRV Features from ECG RR interval") \
         .add_input_stream(rr_data.metadata.get_name()) \
         .add_dataDescriptor(
         DataDescriptor()
             .set_name("var")
             .set_type("double")
             .set_attribute("description","variance")) \
         .add_dataDescriptor(
         DataDescriptor()
             .set_name("iqr")
             .set_type("double")
             .set_attribute("description","Inter Quartile Range")) \
         .add_dataDescriptor(
         DataDescriptor()
             .set_name("mean")
             .set_type("double")
             .set_attribute("description","Mean RR Interval")) \
         .add_dataDescriptor(
         DataDescriptor()
             .set_name("median")
             .set_type("double")
             .set_attribute("description","Median RR Interval")) \
         .add_dataDescriptor(
         DataDescriptor()
             .set_name("80th")
             .set_type("double")
             .set_attribute("description","80th percentile RR Interval")) \
         .add_dataDescriptor(
         DataDescriptor()
             .set_name("20th")
             .set_type("double")
             .set_attribute("description","20th percentile RR Interval")) \
         .add_dataDescriptor(
         DataDescriptor()
             .set_name("heartrate")
             .set_type("double")
             .set_attribute("description","Heart Rate in BPM")) \
         .add_dataDescriptor(
         DataDescriptor()
             .set_name("vlf")
             .set_type("double")
             .set_attribute("description","Very Low Frequency Energy")) \
         .add_dataDescriptor(
         DataDescriptor()
             .set_name("lf")
             .set_type("double")
             .set_attribute("description","Low Frequency Energy")) \
         .add_dataDescriptor(
         DataDescriptor()
             .set_name("hf")
             .set_type("double")
             .set_attribute("description","High Frequency Energy")) \
         .add_dataDescriptor(
         DataDescriptor()
             .set_name("lfhf")
             .set_type("double")
             .set_attribute("description","Low frequency to High Frequency energy ratio")) \
         .add_dataDescriptor(
         DataDescriptor()
             .set_name("window")
             .set_type("struct")
             .set_attribute("description","window start and end time in UTC")
             .set_attribute('start','start of window')
             .set_attribute('end','end of window')) \
         .add_module(
         ModuleMetadata().set_name("HRV Features from ECG RR Interval")
             .set_attribute("url", "http://md2k.org/")
             .set_attribute('algorithm','ecg feature computation')
             .set_attribute('unit','ms')
             .set_author("Md Azim Ullah", "*****@*****.**"))
     return stream_metadata
def gen_location_datastream(user_id, stream_name) -> object:
    """
    Create pyspark dataframe with some sample gps data (Memphis, TN, lat, long, alt coordinates)

    Args:
        user_id (str): id of a user
        stream_name (str): sample gps stream name

    Returns:
        DataStream: datastream object of gps location stream with its metadata

    """
    column_name = [
        "timestamp", "localtime", "user", "version", "latitude", "longitude",
        "altitude", "speed", "bearing", "accuracy"
    ]
    sample_data = []
    timestamp = datetime(2019, 9, 1, 11, 34, 59)
    sqlContext = get_or_create_sc("sqlContext")

    lower_left = [35.079678, -90.074136]
    upper_right = [35.194771, -89.868766]
    alt = [i for i in range(83, 100)]

    for location in range(5):
        lat = random.uniform(lower_left[0], upper_right[0])
        long = random.uniform(lower_left[1], upper_right[1])
        for dp in range(150):
            lat_val = random.gauss(lat, 0.001)
            long_val = random.gauss(long, 0.001)
            alt_val = random.choice(alt)

            speed_val = round(random.uniform(0.0, 5.0), 6)
            bearing_val = round(random.uniform(0.0, 350), 6)
            accuracy_val = round(random.uniform(10.0, 30.4), 6)

            timestamp = timestamp + timedelta(minutes=1)
            localtime = timestamp + timedelta(hours=5)
            sample_data.append(
                (timestamp, localtime, user_id, 1, lat_val, long_val, alt_val,
                 speed_val, bearing_val, accuracy_val))

    df = sqlContext.createDataFrame(sample_data, column_name)

    stream_metadata = Metadata()
    stream_metadata.set_name(stream_name).set_description("GPS sample data stream.") \
        .add_dataDescriptor(
        DataDescriptor().set_name("latitude").set_type("float").set_attribute("description", "gps latitude")) \
        .add_dataDescriptor(
        DataDescriptor().set_name("longitude").set_type("float").set_attribute("description", "gps longitude")) \
        .add_dataDescriptor(
        DataDescriptor().set_name("altitude").set_type("float").set_attribute("description", "gps altitude")) \
        .add_dataDescriptor(
        DataDescriptor().set_name("speed").set_type("float").set_attribute("description", "speed info")) \
        .add_dataDescriptor(
        DataDescriptor().set_name("bearing").set_type("float").set_attribute("description", "bearing info")) \
        .add_dataDescriptor(
        DataDescriptor().set_name("accuracy").set_type("float").set_attribute("description",
                                                                              "accuracy of gps location")) \
        .add_module(
        ModuleMetadata().set_name("examples.util.data_helper.gen_location_data").set_attribute("attribute_key",
                                                                                               "attribute_value").set_author(
            "Nasir Ali", "*****@*****.**"))
    stream_metadata.is_valid()

    ds = DataStream(data=df, metadata=stream_metadata)
    return ds
def gen_location_datastream(user_id, stream_name) -> object:
    """
    Create pyspark dataframe with some sample gps data (Memphis, TN, lat, long, alt coordinates)

    Args:
        user_id (str): id of a user
        stream_name (str): sample gps stream name

    Returns:
        DataStream: datastream object of gps location stream with its metadata

    """
    column_name = [
        "timestamp", "localtime", "user", "version", "latitude", "longitude",
        "altitude", "speed", "bearing", "accuracy"
    ]
    sample_data = []
    timestamp = datetime(2019, 1, 9, 11, 34, 59)
    sqlContext = get_or_create_sc("sqlContext")
    lat = [
        35.1247391, 35.1257391, 35.1217391, 35.1117391, 35.1317391, 35.1287391,
        35.5217391
    ]
    long = [
        -89.9750021, -89.9710021, -89.9800021, -89.9670021, -89.9790021,
        -89.9710021, -89.8700021
    ]
    alt = [83.0, 84.0, 85.0, 86.0, 87.0, 88.0, 89.0]
    for dp in range(500):
        lat_val = random.choice(lat)
        long_val = random.choice(long)
        alt_val = random.choice(alt)
        #ts_val = 15094)+(16272882+(dp*1000000))
        speed_val = round(random.uniform(0.0, 5.0), 6)
        bearing_val = round(random.uniform(0.0, 350), 6)
        accuracy_val = round(random.uniform(10.0, 30.4), 6)
        #all_dps = ",".join([ts_val, lat_val, long_val, alt_val, speed_val, bearing_val, accuracy_val])
        timestamp = timestamp + timedelta(minutes=1)
        localtime = timestamp + timedelta(hours=5)
        sample_data.append(
            (timestamp, localtime, user_id, 1, lat_val, long_val, alt_val,
             speed_val, bearing_val, accuracy_val))

    df = sqlContext.createDataFrame(sample_data, column_name)

    stream_metadata = Metadata()
    stream_metadata.set_name(stream_name).set_version(1).set_description("GPS sample data stream.") \
        .add_dataDescriptor(
        DataDescriptor().set_name("latitude").set_type("float").set_attribute("description", "gps latitude")) \
        .add_dataDescriptor(
        DataDescriptor().set_name("longitude").set_type("float").set_attribute("description", "gps longitude")) \
        .add_dataDescriptor(
        DataDescriptor().set_name("altitude").set_type("float").set_attribute("description", "gps altitude")) \
        .add_dataDescriptor(
        DataDescriptor().set_name("speed").set_type("float").set_attribute("description", "speed info")) \
        .add_dataDescriptor(
        DataDescriptor().set_name("bearing").set_type("float").set_attribute("description", "bearing info")) \
        .add_dataDescriptor(
        DataDescriptor().set_name("accuracy").set_type("float").set_attribute("description", "accuracy of gps location")) \
        .add_module(
        ModuleMetadata().set_name("examples.util.data_helper.gen_location_data").set_version("0.0.1").set_attribute("attribute_key", "attribute_value").set_author(
            "test_user", "test_user@test_email.com"))
    stream_metadata.is_valid()

    return DataStream(data=df, metadata=stream_metadata)