Exemplo n.º 1
0
def current_information():
	timeline_records =  fcn.records_count('timeline',None)
	relation_records = fcn.records_count('rating',None)
	deletion_number = fcn.records_count('timeline', 'WHERE __flag__ = 1')
	update_number = timeline_records-(relation_records+deletion_number)
	snapshot_list = fcn.fetch_table_list('snapshot')
	relation_size = fcn.table_size('rating')['size']
	timeline_size = fcn.table_size('timeline')['size']
	timeline_duration = fcn.table_duration('timeline')
	
	info_dict = {'timeline_records':timeline_records,'timeline_size' : timeline_size , 
	'relation_records':relation_records, 'relation_size': relation_size,
	'number_of_deletes':deletion_number, 'number_of_updates': update_number, 
	'snapshot_list':snapshot_list, 'timeline_duration': timeline_duration}

	return info_dict
def create_clusters(query_list,number):
	query_in_seconds = []
	cluster_info_date = {}
	cluster_info_scalar = {}
	duration = fcn.table_duration('timeline')
	for i in range(len(query_list)):
		query_in_seconds.append(fcn.calc_sec_difference(duration['min'],query_list[i]))
	query_list_2D = make_query_list_2D(query_in_seconds)
	# recommended = recommend_no_clusters(10) #elbow method to find the optimal snapshot numbers
	clustered_list,centroids = query_clustering(query_list_2D,number)
	query_clusters_date,snapshots_date, query_clusters_seconds = fetch_clustered_info(query_list_2D,clustered_list,number,duration['min'])
	cluster_info_date['clusters_date'] = query_clusters_date
	cluster_info_date['snapshots_date'] = snapshots_date
	cluster_info_date['base_date'] = duration['min']
	for i in range(len(centroids)):
		cluster_info_scalar[centroids[i][0]] = query_clusters_seconds[i]
	return cluster_info_date,cluster_info_scalar
def recommend_no_clusters(max_snapshot): #elbow method in clustering
	cost = []
	query_in_seconds = []
	queries = fcn.read_query_info()
	duration = fcn.table_duration('timeline')
	for i in range(len(queries)):
		query_in_seconds.append(fcn.calc_sec_difference(duration['min'],queries[i]))
	query_list_2D = make_query_list_2D(query_in_seconds)
	for i in range(1,max_snapshot+1):
		kmeans = KMeans(n_clusters = i, init = 'k-means++', max_iter = 300, n_init = 10, random_state = 0)
		y_kmeans = kmeans.fit_predict(query_list_2D)
		cost.append(kmeans.inertia_)
	fig = plt.figure()
	plt.plot(cost)
	plt.xlabel('number of snapshots')
	plt.ylabel('overall cost')
	fig.savefig('static/charts/elbow.png')
def random_query_generator(number):
	fmt = '%Y-%m-%d %H:%M:%S'
	duration = fcn.table_duration('timeline')
	start = duration['min']
	end = duration['max']
	number = int(number)/10
	second_difference = fcn.calc_sec_difference(start,end)
	query_list = fcn.create_query_clusters(second_difference,10,number)
	random_dates = []
	for i in range(len(query_list)):
		date = start + dt.timedelta(seconds = query_list[i])
		random_dates.append(date)
	fcn.delete_firebase('queries')
	for i in range(len(random_dates)):
		fcn.firebase_writing({'name':'query {0}'.format(str(i)), 'timestamp': str(random_dates[i])},'queries','')
	fcn.save_query_info(random_dates)
	return random_dates
def union_snapshot_and_query(temp_snapshot_name,timeline_table,f_value_clause,query_timestamp,materializing_snapshot):
	start_timestamp = fcn.table_duration(materializing_snapshot)['max']
	materialized_attribs = fcn.table_attribs(materializing_snapshot)
	snap_attribs = ",\n".join("{c}".format(c=x) for x in materialized_attribs if not (x == 'signer' or x =='snap_sign'))
	sql = '''
	CREATE TABLE IF NOT EXISTS {temp} AS
	SELECT DISTINCT * FROM (
		SELECT rec_id,
		{latest_attributes},
		max(__t__) OVER w AS __t__,
		first_value(__flag__) over w AS  flag
		FROM {timeline_table}
		WHERE (__t__ BETWEEN %s AND %s)
		window w AS (partition by rec_id ORDER BY __t__ DESC)) T
		UNION ALL
		SELECT {snapshot_attributes} FROM {materialized_snapshot}
	'''.format(temp =temp_snapshot_name,
		timeline_table = 'timeline',
		latest_attributes = f_value_clause,
		snapshot_attributes = snap_attribs,
		materialized_snapshot = materializing_snapshot)
	attributes = [start_timestamp,query_timestamp]
	db.command(sql,attributes)
	db.commit()
def verify_trust():
	duration = fcn.table_duration('timeline')
	status = fcn.verify_trustworthiness(duration['min'],duration['max'])
	print status