Пример #1
0
def features_by_day_one_user(user_id):
	features_counter_per_day = {'activityRecognitionResult_activity' : {},
	'androidActivityRecognitionResult_activity' : {},
	'appLaunch' : {},
	'battery_health' : {},
	'bluetooth' : {},
	'notifications' : {},
	'headsetPlug' : {},
	'location' : {},
	'networkInfo_state' : {},
	'telephony' : {},
	'wifiAps' : {},
	'wifiConnectedAp' : {},
	'sensor' : {}
	}
	data = DataExtractor.load_json_data(user_id)
	
	first_record_date = datetime.date(2020,1,1)
	last_record_date = datetime.date(1970,1,1)
	for record_id in data:
		current_date = DataExtractor.date_of_record(data[record_id])
		current_date_str = current_date.strftime('%Y-%m-%d')
		
		for key, value in data[record_id].iteritems():
			#used to compute the first and the last record
			if current_date > last_record_date:
				last_record_date = current_date
			if current_date<first_record_date:
				first_record_date=current_date
			
			#get the feature (=key of features_counter_per_day) corresponding to this entry
			feature = DataOperations.give_me_feature(features_counter_per_day.keys(),key,value)
			
			if feature != None:
				if current_date_str not in features_counter_per_day[feature]:
					features_counter_per_day[feature][current_date_str]=0
				#the feature corresponding to this entry belongs to the features that we want to record
				#increase the counter of the corresponding feature by 1
				features_counter_per_day[feature][current_date_str]+=1
	
	
	#fill all the missing dates between the first record and the last record for all the features
	for feature,counter_by_day in features_counter_per_day.iteritems():
		features_counter_per_day[feature]= DataOperations.fill_in_between_with_zeros(counter_by_day, first_record_date, last_record_date, 'days')
		#sort the dictionary by increasing dates
		features_counter_per_day[feature] = collections.OrderedDict(sorted(counter_by_day.items()))
	
	
	#construct the plot
	y_values_matrix = []
	for dict in features_counter_per_day.values():
		y_values_matrix.append(dict.values())
		
	
	#One label per 2 weeks	
	nb_labels = len(features_counter_per_day['androidActivityRecognitionResult_activity'].keys())/15
	PlotlibDrawer.plot(features_counter_per_day['androidActivityRecognitionResult_activity'].keys(), y_values_matrix, features_counter_per_day.keys(), "days", "number records", "records per day for user "+str(user_id)+"("+str(DataExtractor.user_long_ids[user_id])+")", nb_labels)
	
	
	return features_counter_per_day
def features_by_hour_one_user(user_id):
	
	'''focus_periods = {1: [[datetime.datetime(year=2014, month=8, day=15, hour=0),datetime.datetime(year=2014, month=9, day=17, hour=23)],
					  [datetime.datetime(year=2014, month=10, day=1, hour=0),datetime.datetime(year=2014, month=10, day=30, hour=23)]],
				2: [[datetime.datetime(year=2014, month=9, day=1, hour=0),datetime.datetime(year=2014, month=9, day=30, hour=23)],
					  [datetime.datetime(year=2014, month=12, day=1, hour=0),datetime.datetime(year=2014, month=12, day=30, hour=23)]],
				3: [[datetime.datetime(year=2014, month=9, day=1, hour=0),datetime.datetime(year=2014, month=10, day=1, hour=23)]],
				4: [[datetime.datetime(year=2014, month=8, day=28, hour=0),datetime.datetime(year=2014, month=10, day=5, hour=23)],
					  [datetime.datetime(year=2015, month=1, day=1, hour=0),datetime.datetime(year=2015, month=1, day=15, hour=23)]],
				5: [[datetime.datetime(year=2014, month=9, day=1, hour=0),datetime.datetime(year=2014, month=9, day=30, hour=23)],
					  [datetime.datetime(year=2014, month=12, day=10, hour=0),datetime.datetime(year=2015, month=1, day=10, hour=23)]],
				6: [[datetime.datetime(year=2014, month=10, day=17, hour=0),datetime.datetime(year=2014, month=11, day=17, hour=23)]],
				7: [[datetime.datetime(year=2014, month=8, day=27, hour=0),datetime.datetime(year=2014, month=8, day=28, hour=23)]]
				}'''
	
	focus_periods = {1: [[datetime.datetime(year=2014, month=10, day=5, hour=0),datetime.datetime(year=2014, month=10, day=16, hour=23)]],
				2: [],
				3: [],
				4: [[datetime.datetime(year=2014, month=12, day=1, hour=0),datetime.datetime(year=2014, month=12, day=21, hour=23)]],
				5: [[datetime.datetime(year=2014, month=11, day=17, hour=0),datetime.datetime(year=2014, month=12, day=17, hour=23)]],
				6: [],
				7: []
				}
				
	target_features = ['notifications',
		'sensor',
		'activityRecognitionResult_activity',
		#'androidActivityRecognitionResult_activity',
		'appLaunch',
		#'battery_health',
		#'bluetooth',
		#'headsetPlug',
		'location'
		#'networkInfo_state',
		#'telephony',
		#'wifiAps',
		#'wifiConnectedAp',
	]



	data = DataExtractor.load_json_data(user_id)
	
	for periods in focus_periods[user_id]:
		#get a dictionary representing the number of features per hour in the selected period
		features_per_hour = DataOperations.features_per_hour(periods[0], periods[1], data , target_features)
		
		
		#make the plot
		#construct the plot
		y_values_matrix = []
		for dict in features_per_hour.values():
			y_values_matrix.append(dict.values())
		
		#One label each 12 hours	
		nb_labels = len(features_per_hour[target_features[0]].keys())/(12)
		PlotlibDrawer.plot(features_per_hour[target_features[0]].keys(), y_values_matrix, features_per_hour.keys(), "hours", "number records", "records per hour for user "+str(user_id)+"("+str(DataExtractor.user_long_ids[user_id])+")", nb_labels)
		
def records_by_day_one_user(file_name, user_name):
    json_data = open(file_name).read()
    data = json.loads(json_data)

    records_by_day = {}
    previous_date = None
    previous_date_str = ""
    for record in data['logInfo']:
        '''get the time of the first feature of any record. Note that the times of the different
		note that the time of the different features of the same record are close so we decided arbitrarily 
		to take the time of the first one'''
        try:
            current_time = data['logInfo'][record].iteritems().next(
            )[1][u'createDate']
        except TypeError:
            #in some cases the first feature is an array of features
            current_time = data['logInfo'][record].iteritems().next(
            )[1][0][u'createDate']

        current_date = datetime.datetime.fromtimestamp((current_time / 1000.0))
        current_date_str = current_date.strftime('%Y-%m-%d')

        #add all the missing days between two records
        while previous_date_str != current_date_str and previous_date != None:
            if previous_date_str not in records_by_day:
                records_by_day[previous_date_str] = 0

            if previous_date < current_date:
                previous_date += datetime.timedelta(days=1)
                previous_date_str = previous_date.strftime('%Y-%m-%d')
            elif previous_date > current_date:
                previous_date -= datetime.timedelta(days=1)
                previous_date_str = previous_date.strftime('%Y-%m-%d')

        #increase the counter of the corresponding record date by 1 record
        if current_date_str not in records_by_day:
            records_by_day[current_date_str] = 0
        records_by_day[current_date_str] += 1

        previous_date = current_date
        previous_date_str = current_date_str

    #sort the dictionary by increasing dates
    ordered_dict_by_date = collections.OrderedDict(
        sorted(records_by_day.items()))

    #One label per 2 weeks
    nb_labels = len(ordered_dict_by_date.keys()) / 15
    PlotlibDrawer.plot(ordered_dict_by_date.keys(),
                       ordered_dict_by_date.values(), "days", "records number",
                       "records per day for user " + user_name, nb_labels)

    return ordered_dict_by_date
def records_by_day_one_user(file_name, user_name):
	json_data=open(file_name).read()
	data = json.loads(json_data)
	
	records_by_day={}
	previous_date= None
	previous_date_str=""
	for record in data['logInfo']:
		'''get the time of the first feature of any record. Note that the times of the different
		note that the time of the different features of the same record are close so we decided arbitrarily 
		to take the time of the first one'''
		try:
			current_time = data['logInfo'][record].iteritems().next()[1][u'createDate']
		except TypeError:
			#in some cases the first feature is an array of features
			current_time = data['logInfo'][record].iteritems().next()[1][0][u'createDate']
		
		current_date = datetime.datetime.fromtimestamp((current_time/1000.0))
		current_date_str = current_date.strftime('%Y-%m-%d')
		
		#add all the missing days between two records
		while previous_date_str != current_date_str and previous_date != None:
			if previous_date_str not in records_by_day:
				records_by_day[previous_date_str]=0
			
			if previous_date < current_date:
				previous_date += datetime.timedelta(days=1)
				previous_date_str = previous_date.strftime('%Y-%m-%d')
			elif previous_date > current_date:
				previous_date -= datetime.timedelta(days=1)
				previous_date_str = previous_date.strftime('%Y-%m-%d')
		
		#increase the counter of the corresponding record date by 1 record
		if current_date_str not in records_by_day:
			records_by_day[current_date_str]=0
		records_by_day[current_date_str]+=1
		
		previous_date = current_date
		previous_date_str = current_date_str
	
	
	#sort the dictionary by increasing dates
	ordered_dict_by_date = collections.OrderedDict(sorted(records_by_day.items()))
	
	
	#One label per 2 weeks
	nb_labels = len(ordered_dict_by_date.keys())/15
	PlotlibDrawer.plot(ordered_dict_by_date.keys(), ordered_dict_by_date.values(), "days", "records number", "records per day for user "+user_name, nb_labels)
	
	
	return ordered_dict_by_date
'''
for each user, plots the number of records of some features he has by day.

As output, plots one plot by user
'''

#!/usr/bin/env python
import sys
import pprint as pp
import os.path

sys.path.insert(0, "/home/dehajjik/workspace/src/utils")
from features_by_day_one_user import features_by_day_one_user as fbdou
from data_utils import DataExtractor
from plot_lib_utils import PlotlibDrawer


for user_id in DataExtractor.users_ids_list():
	dict = fbdou(user_id)
	print("user "+str(user_id)+" extracted")
	
PlotlibDrawer.show()
Пример #6
0
def compare(reference_transformation, user_id):
	global labels_importance
	global labels_importance_rank
	#global labels_importance_derivative
	index = 0
	transformations = transformation_vectors.keys()
	for label in rows_labels:
		labels_importance[label] = {}
		labels_importance_rank[label] = {}
		for transformation in transformations:
			labels_importance[label][transformation]=transformation_vectors[transformation][0][index]
			labels_importance_rank[label][transformation]= transformation_vectors[transformation][1][index]
			#labels_importance_derivative[label][transformation]= transformation_vectors[transformation][2][index]
		
		index +=1
		
		
	#sort the dictionaries per presence rate. The most frequent feature at the biginning
	labels_importance = collections.OrderedDict(sorted(labels_importance.items(), key=lambda x: x[1][reference_transformation], reverse = True))
	#labels_importance_derivative = collections.OrderedDict(sorted(labels_importance_derivative.items(), key=lambda x: x[1][reference_transformation], reverse = True))
	labels_importance_rank = collections.OrderedDict(sorted(labels_importance_rank.items(), key=lambda x: x[1][reference_transformation]))
	
	
	print JsonUtils.dict_as_json_str(labels_importance)
	
	print JsonUtils.dict_as_json_str(labels_importance_rank)
	#print np.shape(data_matrix)
	
	#write the dictionaries into files
	out = LogsFileWriter.open(file_name)
	LogsFileWriter.write(JsonUtils.dict_as_json_str(labels_importance),out)
	LogsFileWriter.write(JsonUtils.dict_as_json_str(labels_importance_rank),out)
	LogsFileWriter.close(out)
	
	
	#plot the records importance vs different transformation scores
	importances_list = []
	importances_legends = []
	ranks_list = []
	ranks_legends = []
	importances_derivatives_list = []
	importances_derivatives_legends = []
	for transformation in transformations:
		importance_list = [importance[transformation] for importance in labels_importance.values()]
		importances_list.append(importance_list)
		importances_legends.append(transformation)
		
		rank_list = [rank[transformation] for rank in labels_importance_rank.values()]
		ranks_list.append(rank_list)
		ranks_legends.append(transformation)
		
		importance_derivative_list = np.diff(np.asarray(importance_list), 1).tolist()
		importances_derivatives_list.append(importance_derivative_list)
		importances_derivatives_legends.append(transformation)
		
		
	importances_derivatives_list.append([0]*len(importances_derivatives_list[0]))
	importances_derivatives_legends.append("y=0")
	PlotlibDrawer.plot_1(labels_importance.keys(), [percentage["presence_percentage"] for percentage in labels_importance.values()], "features rank", "% records", "presence rate of the features in the records", 10)
	PlotlibDrawer.plot_2(labels_importance.keys(), importances_list, importances_legends, "features rank", "features scores", "comparison of different transformation scores "+str(user_id), 11)
	PlotlibDrawer.plot_2(labels_importance_rank.keys(), ranks_list, ranks_legends, "features initial rank", "features rank after transformation", "comparison of different transformation ranks "+str(user_id), 11)
	PlotlibDrawer.plot_2(labels_importance.keys(), importances_derivatives_list, importances_derivatives_legends, "features initial rank", "features scores derivative", "comparison of different transformation scores derivative "+str(user_id), 11)
	
		
	
	
	
		
	
	
	
#!/usr/bin/env python
import sys
sys.path.insert(0, "/home/dehajjik/workspace/src/utils")
from transformations_comparaison_one_user import transformations_comparaison_one_user as tcou
from plot_lib_utils import PlotlibDrawer

from matrix_data_utils import *
'''
make the categorized data transformation for all the users
'''
for user_id in MDataExtractor.users_ids_list():
    print("user " + str(user_id) + " on working")
    tcou(user_id)

    print("user " + str(user_id) + " extracted")

PlotlibDrawer.show()