def weather_and_operating_features_extraction_one_user(user_id): out_path_prefix = "/home/dehajjik/workspace/logs/" data = DataExtractor.load_json_data(user_id) known_features = (["activityRecognitionResult", "androidActivityRecognitionResult", "appLaunch", "battery", "bluetooth", "event", "notifications", "headsetPlug", "location", "networkInfo", "sensor", "settingInfo", "settingInfo", "telephony", "telephony2", "wifiAps", "wifiConnectedAp", "launcherLayouts", "predictors", "neighboringCellInfo2", "neighboringCellInfo" ]) #filter notification already adds to the records the attributes createDateTimeZone and rcreateDateTimeZone #check that the data is really ordered by date!! data = DataOperations.filter_notifications(data) #the data is sorted and notification is filtered and annotated with the timezone date. for record_id in data: record = data[record_id] for key, value in record.iteritems(): if key not in known_features: if key not in other_features.keys(): other_features[key]=[] if len(other_features[key])<100: other_features[key].append(value) t = time.strftime("%Y%m%d") t = t+""+time.strftime("%H%M%S") JsonUtils.save_json_data(out_path_prefix+"/"+t+"extra_features_u"+str(user_id), other_features) return other_features
def weather_and_operating_features_extraction_one_user(user_id): out_path_prefix = "/home/dehajjik/workspace/logs/" data = DataExtractor.load_json_data(user_id) known_features = ([ "activityRecognitionResult", "androidActivityRecognitionResult", "appLaunch", "battery", "bluetooth", "event", "notifications", "headsetPlug", "location", "networkInfo", "sensor", "settingInfo", "settingInfo", "telephony", "telephony2", "wifiAps", "wifiConnectedAp", "launcherLayouts", "predictors", "neighboringCellInfo2", "neighboringCellInfo" ]) #filter notification already adds to the records the attributes createDateTimeZone and rcreateDateTimeZone #check that the data is really ordered by date!! data = DataOperations.filter_notifications(data) #the data is sorted and notification is filtered and annotated with the timezone date. for record_id in data: record = data[record_id] for key, value in record.iteritems(): if key not in known_features: if key not in other_features.keys(): other_features[key] = [] if len(other_features[key]) < 100: other_features[key].append(value) t = time.strftime("%Y%m%d") t = t + "" + time.strftime("%H%M%S") JsonUtils.save_json_data( out_path_prefix + "/" + t + "extra_features_u" + str(user_id), other_features) return other_features
def filter_notifications_one_user(user_id): out_path_prefix = "/speech/dbwork/mul/students/dehajjik/notifications_filtered/" data = DataExtractor.load_json_data(user_id) data = DataOperations.filter_notifications(data) JsonUtils.save_json_data( out_path_prefix + str(DataExtractor.user_long_ids[user_id]) + "/all/all_in_one_validated_log", data ) return data
def filter_notifications_one_user(user_id): out_path_prefix = "/speech/dbwork/mul/students/dehajjik/notifications_filtered/" data = DataExtractor.load_json_data(user_id) data = DataOperations.filter_notifications(data) JsonUtils.save_json_data( out_path_prefix + str(DataExtractor.user_long_ids[user_id]) + "/all/all_in_one_validated_log", data) return data
def output_sample(user_id): specific_date_times = { 1: [ datetime.datetime(year=2014, month=8, day=19, hour=16), datetime.datetime(year=2014, month=8, day=27, hour=15), datetime.datetime(year=2014, month=9, day=5, hour=18), datetime.datetime(year=2014, month=10, day=12, hour=15), datetime.datetime(year=2014, month=9, day=1, hour=1) ], 2: [ datetime.datetime(year=2014, month=9, day=25, hour=7), datetime.datetime(year=2014, month=12, day=8, hour=6), datetime.datetime(year=2014, month=9, day=25, hour=1) ], 3: [datetime.datetime(year=2014, month=9, day=25, hour=17)], 4: [ datetime.datetime(year=2014, month=9, day=5, hour=14), datetime.datetime(year=2015, month=1, day=8, hour=11), datetime.datetime(year=2014, month=9, day=2, hour=13) ], 5: [ datetime.datetime(year=2014, month=9, day=22, hour=18), datetime.datetime(year=2015, month=1, day=5, hour=13), datetime.datetime(year=2014, month=12, day=29, hour=13) ], 6: [ datetime.datetime(year=2014, month=10, day=26, hour=3), datetime.datetime(year=2014, month=11, day=4, hour=8) ], 7: [datetime.datetime(year=2014, month=7, day=28, hour=10)] } out_path_prefix = "/home/dehajjik/workspace/resources/filtered_notifs/" data = DataExtractor.load_json_data(user_id) #for each specific date and hour write the data that occured at that specified to a file. Json format for specific_dt in specific_date_times[user_id]: selected_data = DataExtractor.select_records_by_date_and_hour( data, specific_dt) selected_data = DataOperations.order_chronologically_and_annotate( selected_data) #selected_data = DataOperations.annotate(selected_data) JsonUtils.save_json_data( out_path_prefix + "u" + str(user_id) + " d" + specific_dt.strftime('%Y-%m-%d %H'), selected_data) print(str(json.dumps(selected_data.keys(), indent=4)))
def extract_specific_date_time_one_user(user_id): '''specific_date_times = {1: [datetime.datetime(year=2014, month=8, day=19, hour=16), datetime.datetime(year=2014, month=8, day=27, hour=15), datetime.datetime(year=2014, month=9, day=5, hour=18), datetime.datetime(year=2014, month=10, day=12, hour=15), datetime.datetime(year=2014, month=9, day=1, hour=1)], 2: [datetime.datetime(year=2014, month=9, day=25, hour=7),datetime.datetime(year=2014, month=12, day=8, hour=6), datetime.datetime(year=2014, month=9, day=25, hour=1)], 3: [datetime.datetime(year=2014, month=9, day=13, hour=0), datetime.datetime(year=2014, month=9, day=25, hour=17)], 4: [datetime.datetime(year=2014, month=9, day=5, hour=14), datetime.datetime(year=2015, month=1, day=8, hour=11), datetime.datetime(year=2014, month=9, day=2, hour=13)], 5: [datetime.datetime(year=2014, month=9, day=22, hour=18), datetime.datetime(year=2015, month=1, day=5, hour=13), datetime.datetime(year=2014, month=12, day=29, hour=13)], 6: [datetime.datetime(year=2014, month=10, day=26, hour=3), datetime.datetime(year=2014, month=11, day=4, hour=8)], 7: [datetime.datetime(year=2014, month=7, day=28, hour=10)]}''' specific_date_times = { 1: [ datetime.datetime(year=2014, month=10, day=12, hour=14), datetime.datetime(year=2014, month=10, day=12, hour=22) ], 2: [], 3: [], 4: [datetime.datetime(year=2014, month=12, day=9, hour=15)], 5: [ datetime.datetime(year=2014, month=12, day=5, hour=12), datetime.datetime(year=2014, month=12, day=16, hour=9) ], 6: [], 7: [] } out_path_prefix = "/home/dehajjik/workspace/resources/" data = DataExtractor.load_json_data(user_id) #for each specific date and hour write the data that occured at that specified to a file. Json format for specific_dt in specific_date_times[user_id]: selected_data = DataExtractor.select_records_by_date_and_hour( data, specific_dt) #annotate the records with readable dates and sort the notifications chronologically for record in selected_data: record = DataOperations.annotate_with_readable_date_no_timezone( record) record = DataOperations.order_chronologically_notifications_and_annotate( record) #sort the records chronologically selected_data = DataOperations.order_chronologically_and_annotate( selected_data) JsonUtils.save_json_data( out_path_prefix + "u" + str(user_id) + " d" + specific_dt.strftime('%Y-%m-%d %H'), selected_data)
def transform_to_categorized_data_one_user(user_id): out_path_prefix = "/speech/dbwork/mul/students/dehajjik/categorized_data/" data_key = "data" metadata_key = "metadata" print "loading data for user "+str(user_id) nontransformed_data = DataExtractor.load_json_data(user_id) #nontransformed_data = JsonUtils.load_json_data("/home/dehajjik/workspace/resources/sample_data_for_location_categorization_test.json") #the transfomers responsible for the features of the data categorization feature_transformers = {LocationTransformer.transformed_feature_name: LocationTransformer(nontransformed_data), NotificationTransformer.transformed_feature_name : NotificationTransformer(nontransformed_data), ApplaunchTransformer.transformed_feature_name : ApplaunchTransformer(nontransformed_data), BatteryTransformer.transformed_feature_name: BatteryTransformer(nontransformed_data), HeadsetTransformer.transformed_feature_name: HeadsetTransformer(nontransformed_data), BluetoothPairedTransformer.transformed_feature_name: BluetoothPairedTransformer(nontransformed_data), BluetoothSeenTransformer.transformed_feature_name: BluetoothSeenTransformer(nontransformed_data), ActivityTransformer.transformed_feature_name : ActivityTransformer(nontransformed_data)} #the features that we want to transform selected_features = [LocationTransformer.transformed_feature_name, NotificationTransformer.transformed_feature_name, ApplaunchTransformer.transformed_feature_name, #BatteryTransformer.transformed_feature_name, #HeadsetTransformer.transformed_feature_name, BluetoothPairedTransformer.transformed_feature_name, #BluetoothSeenTransformer.transformed_feature_name, ActivityTransformer.transformed_feature_name] #selected_features = [ActivityTransformer.transformed_feature_name] categorized_data = {} categorized_data[data_key]={} categorized_data[metadata_key]={} for feature in selected_features: feature_transformers[feature].transform() if feature_transformers[feature].transformed_feature_data != {None:None} and feature_transformers[feature].transformed_feature_metadata != {None:None}: categorized_data[data_key][feature]= feature_transformers[feature].transformed_feature_data categorized_data[metadata_key][feature] = feature_transformers[feature].transformed_feature_metadata JsonUtils.save_json_data(out_path_prefix+str(DataExtractor.user_long_ids[user_id])+"/all/all_in_one_validated_log", categorized_data) return categorized_data
def output_sample(user_id): specific_date_times = {1: [datetime.datetime(year=2014, month=8, day=19, hour=16), datetime.datetime(year=2014, month=8, day=27, hour=15), datetime.datetime(year=2014, month=9, day=5, hour=18), datetime.datetime(year=2014, month=10, day=12, hour=15), datetime.datetime(year=2014, month=9, day=1, hour=1)], 2: [datetime.datetime(year=2014, month=9, day=25, hour=7),datetime.datetime(year=2014, month=12, day=8, hour=6), datetime.datetime(year=2014, month=9, day=25, hour=1)], 3: [ datetime.datetime(year=2014, month=9, day=25, hour=17)], 4: [datetime.datetime(year=2014, month=9, day=5, hour=14), datetime.datetime(year=2015, month=1, day=8, hour=11), datetime.datetime(year=2014, month=9, day=2, hour=13)], 5: [datetime.datetime(year=2014, month=9, day=22, hour=18), datetime.datetime(year=2015, month=1, day=5, hour=13), datetime.datetime(year=2014, month=12, day=29, hour=13)], 6: [datetime.datetime(year=2014, month=10, day=26, hour=3), datetime.datetime(year=2014, month=11, day=4, hour=8)], 7: [datetime.datetime(year=2014, month=7, day=28, hour=10)]} out_path_prefix = "/home/dehajjik/workspace/resources/filtered_notifs/" data = DataExtractor.load_json_data(user_id) #for each specific date and hour write the data that occured at that specified to a file. Json format for specific_dt in specific_date_times[user_id]: selected_data = DataExtractor.select_records_by_date_and_hour(data, specific_dt) selected_data = DataOperations.order_chronologically_and_annotate(selected_data) #selected_data = DataOperations.annotate(selected_data) JsonUtils.save_json_data(out_path_prefix+"u"+str(user_id)+" d"+specific_dt.strftime('%Y-%m-%d %H'), selected_data) print(str(json.dumps(selected_data.keys(), indent=4)))
def write(content, file_suffix): t = time.strftime("%Y%m%d") t = t+""+time.strftime("%H%M%S") log_file_name = LogsFileWriter.log_dir+t+file_suffix JsonUtils.save_json_data(log_file_name, content)
def write(content, file_suffix): t = time.strftime("%Y%m%d") t = t + "" + time.strftime("%H%M%S") log_file_name = LogsFileWriter.log_dir + t + file_suffix JsonUtils.save_json_data(log_file_name, content)
import json import collections sys.path.insert(0, "/home/dehajjik/workspace/src/utils") from json_utils import JsonUtils from data_utils import * other_features = {} def weather_and_operating_features_extraction_one_user(user_id): out_path_prefix = "/home/dehajjik/workspace/logs/" data = DataExtractor.load_json_data(user_id) known_features = (["activityRecognitionResult", "androidActivityRecognitionResult", "appLaunch", "battery", "bluetooth", "event", "notifications", "headsetPlug", "location", "networkInfo", "sensor", "settingInfo", "settingInfo", "telephony", "telephony2", "wifiAps", "wifiConnectedAp", "launcherLayouts", "predictors" ]) #filter notification already adds to the records the attributes createDateTimeZone and rcreateDateTimeZone #check that the data is really ordered by date!! data = DataOperations.filter_notifications(data) #the data is sorted and notification is filtered and annotated with the timezone date. for record_id in data: record = data[record_id] for key, value in record.iteritems(): key not in known_features: if key not in other_features: other_features[key]=[] other_features[key].append(record) print key+"\n" t = time.strftime("%Y%m%d") t = t+""+time.strftime("%H%M%S") JsonUtils.save_json_data(out_path_prefix+str(DataExtractor.user_long_ids[user_id])+"/"+t+"extra_features_u"+str(user_id), other_features) return other_features
def clean_and_write_data_one_user(user_id): #empty the lists global clean_data clean_data = {} global last_realization_val last_realization_val = {} global last_realization_key last_realization_key = {} ''' For each feature, we want to know what are the distribution of the difference of time observed between two realizations. For that reason we store in time_variances for each feature, the number of times that the difference between two realizations was x minutes. Note that the maximum time variance allowed is timeout_in_minutes. ''' global time_variances time_variances ={} out_path_prefix = "/speech/dbwork/mul/students/dehajjik/clean_data/" data = DataExtractor.load_json_data(user_id) DataOperations.print_times_for_specific_locations(data) #filter notification already adds to the records the attributes createDateTimeZone and rcreateDateTimeZone #check that the data is really ordered by date!! data = DataOperations.filter_notifications(data) #the data is sorted and notification is filtered and annotated with the timezone date. for record_id in data: record = data[record_id] event_type = record['event']['type'] for key, value in record.iteritems(): #test if it has one black listed value, if it is the case ignore it do_ignore = False is_array_attribute = False if key in blacklisted_values: for attribute, black_values in blacklisted_values[key].iteritems(): try : if value[attribute] in black_values: #the current realization contains one blacklisted value, so we need to ignore it do_ignore = True except TypeError: #the feature we have is an array feature is_array_attribute=True break; if is_array_attribute: #the feature we have is an array feature, so we go throught all the elements and we remove the blacklisted ones if they exist temp_value = [] do_remove = False for one_value in value: for attribute, black_values in blacklisted_values[key].iteritems(): if one_value[attribute] in black_values: do_remove = True if not do_remove: temp_value.append(one_value) if len(temp_value)>=1: value = temp_value else: #all the values were removed so ignore this entry do_ignore = True if not do_ignore: if key == "activityRecognitionResult": activityRecognitionResult_update(value , event_type) elif key == "androidActivityRecognitionResult": androidActivityRecognitionResult_update(value, event_type) elif key == "appLaunch": appLaunch_update(value , event_type) elif key == "battery": battery_update(value, event_type) elif key == "bluetooth": bluetooth_update(value, event_type) elif key == "event": event_update(value, event_type) elif key == "notifications": notifications_update(value, event_type) elif key == "headsetPlug": headsetPlug_update(value, event_type) elif key == "location": location_update(value, event_type) elif key == "networkInfo": networkInfo_update(value, event_type) elif key == "sensor": sensor_update(value, event_type) elif key == "settingInfo": settingInfo_update(value, event_type) elif key == "telephony" or key=="telephony2": telephony_update(value, event_type) elif key == "wifiAps": wifiApps_update(value , event_type) elif key == "wifiConnectedAp": wifiConnectedApp_update(value, event_type) '''elif key not in blacklisted_features: print key+"\n"''' order_data() #do some tests to see that the transformation went well do_sanity_check(data,clean_data) JsonUtils.save_json_data(out_path_prefix+str(DataExtractor.user_long_ids[user_id])+"/all/all_in_one_validated_log", clean_data) return clean_data
"seq":[23] } }, "wifiAps": {"2015-01-01 00:55:00 , 1420073700000->2015-01-01 01:10:01 , 1420074601000":[ {"ssid":"w1", "seq":[2,3,4]} ], "2015-01-01 01:10:01 , 1420074601000->2015-01-01 02:00:00 , 1420077600000":[ {"ssid":"w1", "seq":[5,6,7,8]}, {"ssid":"w2", "seq":[5,6,7,8]}, ], "2015-01-01 03:00:01 , 1420081201000->2015-01-01 03:05:00 , 1420081500000":[ {"ssid":"w3", "seq":[9]} ], "2015-01-01 04:05:00 , 1420085100000->2015-01-01 05:00:00 , 1420088400000":[ {"ssid":"w2", "seq":[10,11]}, {"ssid":"w3", "seq":[10,11]} ], "2015-01-01 19:15:00 , 1420139700000->2015-01-01 21:00:00 , 1420146000000":[ {"ssid":"w4", "seq": [21,22]} ] } } JsonUtils.save_json_data( "/home/dehajjik/workspace/resources/sample_data_for_location_categorization_test", data)