Пример #1
0
def features_by_day_one_user(user_id):
	features_counter_per_day = {'activityRecognitionResult_activity' : {},
	'androidActivityRecognitionResult_activity' : {},
	'appLaunch' : {},
	'battery_health' : {},
	'bluetooth' : {},
	'notifications' : {},
	'headsetPlug' : {},
	'location' : {},
	'networkInfo_state' : {},
	'telephony' : {},
	'wifiAps' : {},
	'wifiConnectedAp' : {},
	'sensor' : {}
	}
	data = DataExtractor.load_json_data(user_id)
	
	first_record_date = datetime.date(2020,1,1)
	last_record_date = datetime.date(1970,1,1)
	for record_id in data:
		current_date = DataExtractor.date_of_record(data[record_id])
		current_date_str = current_date.strftime('%Y-%m-%d')
		
		for key, value in data[record_id].iteritems():
			#used to compute the first and the last record
			if current_date > last_record_date:
				last_record_date = current_date
			if current_date<first_record_date:
				first_record_date=current_date
			
			#get the feature (=key of features_counter_per_day) corresponding to this entry
			feature = DataOperations.give_me_feature(features_counter_per_day.keys(),key,value)
			
			if feature != None:
				if current_date_str not in features_counter_per_day[feature]:
					features_counter_per_day[feature][current_date_str]=0
				#the feature corresponding to this entry belongs to the features that we want to record
				#increase the counter of the corresponding feature by 1
				features_counter_per_day[feature][current_date_str]+=1
	
	
	#fill all the missing dates between the first record and the last record for all the features
	for feature,counter_by_day in features_counter_per_day.iteritems():
		features_counter_per_day[feature]= DataOperations.fill_in_between_with_zeros(counter_by_day, first_record_date, last_record_date, 'days')
		#sort the dictionary by increasing dates
		features_counter_per_day[feature] = collections.OrderedDict(sorted(counter_by_day.items()))
	
	
	#construct the plot
	y_values_matrix = []
	for dict in features_counter_per_day.values():
		y_values_matrix.append(dict.values())
		
	
	#One label per 2 weeks	
	nb_labels = len(features_counter_per_day['androidActivityRecognitionResult_activity'].keys())/15
	PlotlibDrawer.plot(features_counter_per_day['androidActivityRecognitionResult_activity'].keys(), y_values_matrix, features_counter_per_day.keys(), "days", "number records", "records per day for user "+str(user_id)+"("+str(DataExtractor.user_long_ids[user_id])+")", nb_labels)
	
	
	return features_counter_per_day
def features_by_hour_one_user(user_id):
	
	'''focus_periods = {1: [[datetime.datetime(year=2014, month=8, day=15, hour=0),datetime.datetime(year=2014, month=9, day=17, hour=23)],
					  [datetime.datetime(year=2014, month=10, day=1, hour=0),datetime.datetime(year=2014, month=10, day=30, hour=23)]],
				2: [[datetime.datetime(year=2014, month=9, day=1, hour=0),datetime.datetime(year=2014, month=9, day=30, hour=23)],
					  [datetime.datetime(year=2014, month=12, day=1, hour=0),datetime.datetime(year=2014, month=12, day=30, hour=23)]],
				3: [[datetime.datetime(year=2014, month=9, day=1, hour=0),datetime.datetime(year=2014, month=10, day=1, hour=23)]],
				4: [[datetime.datetime(year=2014, month=8, day=28, hour=0),datetime.datetime(year=2014, month=10, day=5, hour=23)],
					  [datetime.datetime(year=2015, month=1, day=1, hour=0),datetime.datetime(year=2015, month=1, day=15, hour=23)]],
				5: [[datetime.datetime(year=2014, month=9, day=1, hour=0),datetime.datetime(year=2014, month=9, day=30, hour=23)],
					  [datetime.datetime(year=2014, month=12, day=10, hour=0),datetime.datetime(year=2015, month=1, day=10, hour=23)]],
				6: [[datetime.datetime(year=2014, month=10, day=17, hour=0),datetime.datetime(year=2014, month=11, day=17, hour=23)]],
				7: [[datetime.datetime(year=2014, month=8, day=27, hour=0),datetime.datetime(year=2014, month=8, day=28, hour=23)]]
				}'''
	
	focus_periods = {1: [[datetime.datetime(year=2014, month=10, day=5, hour=0),datetime.datetime(year=2014, month=10, day=16, hour=23)]],
				2: [],
				3: [],
				4: [[datetime.datetime(year=2014, month=12, day=1, hour=0),datetime.datetime(year=2014, month=12, day=21, hour=23)]],
				5: [[datetime.datetime(year=2014, month=11, day=17, hour=0),datetime.datetime(year=2014, month=12, day=17, hour=23)]],
				6: [],
				7: []
				}
				
	target_features = ['notifications',
		'sensor',
		'activityRecognitionResult_activity',
		#'androidActivityRecognitionResult_activity',
		'appLaunch',
		#'battery_health',
		#'bluetooth',
		#'headsetPlug',
		'location'
		#'networkInfo_state',
		#'telephony',
		#'wifiAps',
		#'wifiConnectedAp',
	]



	data = DataExtractor.load_json_data(user_id)
	
	for periods in focus_periods[user_id]:
		#get a dictionary representing the number of features per hour in the selected period
		features_per_hour = DataOperations.features_per_hour(periods[0], periods[1], data , target_features)
		
		
		#make the plot
		#construct the plot
		y_values_matrix = []
		for dict in features_per_hour.values():
			y_values_matrix.append(dict.values())
		
		#One label each 12 hours	
		nb_labels = len(features_per_hour[target_features[0]].keys())/(12)
		PlotlibDrawer.plot(features_per_hour[target_features[0]].keys(), y_values_matrix, features_per_hour.keys(), "hours", "number records", "records per hour for user "+str(user_id)+"("+str(DataExtractor.user_long_ids[user_id])+")", nb_labels)
		
def records_by_day_one_user(file_name, user_name):
    json_data = open(file_name).read()
    data = json.loads(json_data)

    records_by_day = {}
    previous_date = None
    previous_date_str = ""
    for record in data['logInfo']:
        '''get the time of the first feature of any record. Note that the times of the different
		note that the time of the different features of the same record are close so we decided arbitrarily 
		to take the time of the first one'''
        try:
            current_time = data['logInfo'][record].iteritems().next(
            )[1][u'createDate']
        except TypeError:
            #in some cases the first feature is an array of features
            current_time = data['logInfo'][record].iteritems().next(
            )[1][0][u'createDate']

        current_date = datetime.datetime.fromtimestamp((current_time / 1000.0))
        current_date_str = current_date.strftime('%Y-%m-%d')

        #add all the missing days between two records
        while previous_date_str != current_date_str and previous_date != None:
            if previous_date_str not in records_by_day:
                records_by_day[previous_date_str] = 0

            if previous_date < current_date:
                previous_date += datetime.timedelta(days=1)
                previous_date_str = previous_date.strftime('%Y-%m-%d')
            elif previous_date > current_date:
                previous_date -= datetime.timedelta(days=1)
                previous_date_str = previous_date.strftime('%Y-%m-%d')

        #increase the counter of the corresponding record date by 1 record
        if current_date_str not in records_by_day:
            records_by_day[current_date_str] = 0
        records_by_day[current_date_str] += 1

        previous_date = current_date
        previous_date_str = current_date_str

    #sort the dictionary by increasing dates
    ordered_dict_by_date = collections.OrderedDict(
        sorted(records_by_day.items()))

    #One label per 2 weeks
    nb_labels = len(ordered_dict_by_date.keys()) / 15
    PlotlibDrawer.plot(ordered_dict_by_date.keys(),
                       ordered_dict_by_date.values(), "days", "records number",
                       "records per day for user " + user_name, nb_labels)

    return ordered_dict_by_date
def records_by_day_one_user(file_name, user_name):
	json_data=open(file_name).read()
	data = json.loads(json_data)
	
	records_by_day={}
	previous_date= None
	previous_date_str=""
	for record in data['logInfo']:
		'''get the time of the first feature of any record. Note that the times of the different
		note that the time of the different features of the same record are close so we decided arbitrarily 
		to take the time of the first one'''
		try:
			current_time = data['logInfo'][record].iteritems().next()[1][u'createDate']
		except TypeError:
			#in some cases the first feature is an array of features
			current_time = data['logInfo'][record].iteritems().next()[1][0][u'createDate']
		
		current_date = datetime.datetime.fromtimestamp((current_time/1000.0))
		current_date_str = current_date.strftime('%Y-%m-%d')
		
		#add all the missing days between two records
		while previous_date_str != current_date_str and previous_date != None:
			if previous_date_str not in records_by_day:
				records_by_day[previous_date_str]=0
			
			if previous_date < current_date:
				previous_date += datetime.timedelta(days=1)
				previous_date_str = previous_date.strftime('%Y-%m-%d')
			elif previous_date > current_date:
				previous_date -= datetime.timedelta(days=1)
				previous_date_str = previous_date.strftime('%Y-%m-%d')
		
		#increase the counter of the corresponding record date by 1 record
		if current_date_str not in records_by_day:
			records_by_day[current_date_str]=0
		records_by_day[current_date_str]+=1
		
		previous_date = current_date
		previous_date_str = current_date_str
	
	
	#sort the dictionary by increasing dates
	ordered_dict_by_date = collections.OrderedDict(sorted(records_by_day.items()))
	
	
	#One label per 2 weeks
	nb_labels = len(ordered_dict_by_date.keys())/15
	PlotlibDrawer.plot(ordered_dict_by_date.keys(), ordered_dict_by_date.values(), "days", "records number", "records per day for user "+user_name, nb_labels)
	
	
	return ordered_dict_by_date