def post(self): user = self.get_user() if not user: return user_email = self.request.get('user_email') query_id = self.request.get('query_id') csv_data = self.request.get('csv_data') user = User.get_by_email(user_email) query = Query.get_by_id(query_id) for duple in self.parse_csv_data(csv_data): timestamp = duple[0] text = duple[1] # for testing self.response.out.write("<p>%s: %s\n</p>" % (timestamp, text)) dp = DataPoint( timestamp = duple[0], query = query, text = duple[1]) dp.lt_put() self.redirect('/data')
def post(self): user = self.get_user() if not user: return query_id = self.request.get('query_id') #user_email = self.request.get('user') data = self.request.get('data') time = self.request.get('time') #self.request.get('time', '') # get the question from the DB # get the user from the DB # add a Response object to the DB #user = User.get_by_email(user_email) query = db.get(query_id) #hmmm dp = DataPoint( text = data, query = query, timestamp = datetime.now() ) dp.lt_put() ActionLog.log('NewDatapoint', query.user, query.name) self.response.out.write('Got it!')
def post(self): ActionLog.log('ReceivedSMS') sender_phone = self.request.get('From') body = self.request.get('Body') # normalize the phone number to 10 digits # '+12268681112' => '2268681112' sender_phone = sender_phone[len(sender_phone) - 10:] logging.info('Got text from ' + sender_phone) # get the user user = User.get_by_phone(sender_phone) if not user: logging.error("Couldn't get the user for phone: " + sender_phone) return # parse the response query_value_pairs = self.parse_body(body) for query_name in query_value_pairs: value = query_value_pairs[query_name] if query_name == '' or value == '': logging.error('Got a bad response'); return query = Query.get_by_user_and_name(user, query_name) if not query: logging.error("Couldn't get query for user " + user.email + ' and query ' + query.name) continue timestamp = datetime.now() dp = DataPoint( user = user, query = query, text = value, timestamp = timestamp, ) dp.lt_put() ActionLog.log('NewDatapoint', user, query.name) query.refresh() logging.info('Received datapoint ' + query_name + ': ' + value + '\n') self.response.out.write('<Response><Sms>Got it!</Sms></Response>')
def receive(self, mail_message): # we need to make a new data point # get the user from the sender field user_email = mail_message.sender[ mail_message.sender.find('<') + 1 : mail_message.sender.rfind('>') ] user = User.get_by_email(user_email) if is_admin(user) and mail_message.to.find('users@') != -1: forward_to_users(mail_message) return if mail_message.to.find('feedback@') != -1: forward_to_admins(mail_message) return # get the datapoint from the body data = '' query_name = mail_message.subject[ mail_message.subject.rfind("Re:") + 4: ] query = Query.get_by_user_and_name(user, query_name) for content_type, body in mail_message.bodies('text/html'): # basically deprecated, since we're having users submit forms # straight from the emails now. logging.info('Message Body: ' + body.decode()) data = body.decode()[ : body.decode().find('<')] if data == None: data = '' # get the time from now() timestamp = datetime.now() log_str = "Want to create a new datapoint for user %s and with value %s and query name %s and query %s and timestamp %s" % (user_email, data, query_name, query.key(), timestamp) logging.info("Received a message from " + mail_message.sender) logging.info(log_str) # create and put the datapoint # dp = DataPoint(...) dp = DataPoint( text = data, query = query, timestamp = timestamp) dp.lt_put() ActionLog.log('ReceivedEmail') ActionLog.log('NewDatapoint', data=query.name, user=user) query.refresh()
def correlation_coefficient(aquery, bquery): cov = covariance(aquery, bquery) adatapoints = DataPoint.get_by_query(aquery) bdatapoints = DataPoint.get_by_query(bquery) sdA = standard_deviation(adatapoints) sdB = standard_deviation(bdatapoints) if cov is None or sdA is None or sdB is None: return None cc = cov / (sdA * sdB) return cc
def refresh_datapoints(self, queries): # weird thing here. # if DataPoint.get_by_query cache-misses, we are actually refreshing # this twice in a row. If it cache-hits, we only refresh it once (which # is what we want) # maybe this should be inside model.py, but I don't think an extra # few memcache puts every day are going to kill us right now. # for each metric # get all datapoints in json # put into memcache for query in queries: datapoints = DataPoint.get_by_query(query) json_dps = DataPoint.JsonFromArray(datapoints) mck_metric_datapoints = str(query.key()) + '.datapoints' mck_metric_datapoints_last_update = str(query.key()) + '.datapoints-last-update' memcache.set( key=mck_metric_datapoints, value=json_dps, ) memcache.set( key=mck_metric_datapoints_last_update, value=datetime.now().strftime('%s'), ) logging.info("Refreshed datapoints for metric: " + str(query.key()))
def crosssection_suite(query): crosssection_list = [] user = query.user for q in Query.get_by_user(user): if q.format == 'number' and q.name != query.name: for x in range(query_range(q)[0], query_range(q)[1]): avg_name = 'Average when ' + q.name + ' = ' + str(x) avg_value = float_str_format(avg_int_on_sliced_int(query, q, x)) crosssection_list.append((avg_name, avg_value)) percent_name = 'Change from average when ' + q.name + ' = ' + str(x) percent_value =float_str_format(percent_from_avg_int_on_sliced_int(query, q, x)) crosssection_list.append((percent_name, percent_value)) elif q.format == 'text': for word in common_words(DataPoint.get_by_query(q)).split(', '): avg_name = 'Average when "' + word + '" is in ' + q.name avg_value = float_str_format(avg_int_on_sliced_text(query, q, word)) crosssection_list.append((avg_name, avg_value)) #percent_name = 'Change from average when ' + word + ' in ' +q.name #percent_value = float_str_format(percent_from_avg_int_on_sliced_int(query, q, x)) #crosssection_list[percent_name] = percent_value return crosssection_list
def analyze_text_query_data(query): datapoints = DataPoint.get_by_query(query) analytic_list = [] analytic_list.extend(basic_suite(datapoints)) return analytic_list
def query_to_table_row(self, query): metric_html = open('ui/html/metric.html').read() current_value = '' try: current_value = DataPoint.get_by_query_most_recent(query)[0].text except IndexError: current_value = 'None' metric_overview = self.get_overview(query) metric_data = { 'query_id': query.key(), 'name': query.name, 'text': query.text, 'format': query.format, 'frequency': self.frequency_minutes_to_text(query.frequency), 'lastsentat': query.lastSentAt, 'freq_minutes': query.frequency, 'current_value': current_value, 'overview': metric_overview, 'ask_when': json.dumps(query.ask_when), } return metric_html % metric_data
def avg_int_on_sliced_text(aquery, bquery, value): adatapoints = DataPoint.get_by_query(aquery) bdatapoints = DataPoint.get_by_query(bquery) adata = mapize_int_data(adatapoints) bdata = mapize_data(bdatapoints) # not int data! # bucket to days adata = bucket_to_days(adata) bdata = bucket_to_days(bdata) # throw out all the datapoints that aren't 'value' bdata = text_cross_section(bdata, value) adata, bdata = symmettrysize(adata, bdata) avg = map_data_average(adata) return avg
def covariance(int_query_a, int_query_b): # cov = sum for all i (x - xnaught)(y-ynaught) all over N-1 # we need to match up points between the two datasets adatapoints = DataPoint.get_by_query(int_query_a) bdatapoints = DataPoint.get_by_query(int_query_b) adata = mapize_int_data(adatapoints) bdata = mapize_int_data(bdatapoints) #if adata is None: # print int_query_a.name + " is none!" # tweak the data so we only have a single point for each day # this can return just index: data, since we don't care about the actual # times adata = bucket_to_days(adata) bdata = bucket_to_days(bdata) # tweak the data such that there is a 1:1 mapping between the sets adata, bdata = symmettrysize(adata, bdata) # key it from 0... # do the actual covariance N = len(adata) if N <= 1: # we divide by N-1 just below return None aAvg = map_data_average(adata) bAvg = map_data_average(bdata) sum = 0.0 for i in adata.keys(): sum += (adata[i] - aAvg)*(bdata[i] - bAvg) if N-1 <= 0: cov = 0 else: cov = sum/(N-1) return cov
def get(self): user = self.get_user() if not user: return query_id = self.request.get('query_id') query = Query.get_by_id(query_id) datapoints = DataPoint.get_by_query(query) frequencies = common_word_frequencies(datapoints) self.response.out.write('[' + json.dumps(frequencies) + ']')
def avg_int_on_sliced_int(aquery, bquery, value): adatapoints = DataPoint.get_by_query(aquery) bdatapoints = DataPoint.get_by_query(bquery) adata = mapize_int_data(adatapoints) bdata = mapize_int_data(bdatapoints) # bucket sleep by days bdata = bucket_to_days(bdata) adata = bucket_to_days(adata) # throwout all the sleep dps that aren't 8 bdata = integer_cross_section(bdata, value) symmettrysize(adata, bdata) if len(adata) == 0: return 0 # average the bdata values avg = map_data_average(adata) # return it return avg
def analyze_integer_query_data(query): datapoints = DataPoint.get_by_query(query) analytic_list = [] # basics analytic_list.extend(basic_suite(datapoints)) # daily basics analytic_list.extend(daily_suite(datapoints)) analytic_list.extend(covariance_suite(query)) analytic_list.extend(correlation_suite(query)) analytic_list.extend(crosssection_suite(query )) return analytic_list
def get(self): user = self.get_user() if not user: return user_email = self.request.get('user_email') query_id = self.request.get('query_id') user = User.get_by_email(user_email) query = db.get(query_id) #hmmm datapoints = [] for datapoint in DataPoint.get_by_query(query): datapoints.append(datapoint.to_dict()) self.response.out.write(json.dumps(datapoints))
def get(self): user = self.get_user() if not user: return user_email = self.request.get('user_email') user = User.get_by_email(user_email) queries = Query.get_by_user(user) datapoints = [] for query in queries: for datapoint in DataPoint.get_by_query(query): datapoints.append(datapoint.to_dict()) self.response.out.write(json.dumps(datapoints))
def get(self): user = self.get_user() if not user: return user_email = self.request.get('user_email') query_id = self.request.get('query_id') query = Query.get_by_id(query_id) datapoints = DataPoint.get_by_query(query) csv_data = '' for dp in datapoints: csv_data += self.dp_to_csv(dp) self.response.out.write(csv_data)
def query_data_from_db(self, query): query_template = open('ui/html/metric_data.html').read() rows = '' # get all datapoints associated with the query datapoints = DataPoint.get_by_query(query) # for each datapoint from the query # append data_point_to_row(dp) for dp in datapoints: rows += self.data_point_to_row(dp) params = { 'rows': rows, 'name': query.name, 'query_id': query.key() } return query_template % params
def refresh_most_recent_dp(self, queries): for query in queries: most_recent_dp = DataPoint.get_by_query_most_recent(query) mck_most_recent_dp = str(query.key()) + '.most-recent-dp' mck_most_recent_dp_update = str(query.key()) + 'most-recent-dp-update' memcache.set( key=mck_most_recent_dp, value=most_recent_dp, ) memcache.set( key=mck_most_recent_dp_update, value=datetime.now().strftime('%s'), ) logging.info('Updated Most Recent Datapoint for metric: ' + str(query.key()))
def format_twitter_status(status): """Formats tweets into the correct format for delivery to the client """ # if status['text'].startswith('RT'): # return None if not status['geo']: return None print "this works" tweet = DataPoint() tweet.content = status['text'] tweet.latitude = status['geo']['coordinates'][0] tweet.longitude = status['geo']['coordinates'][1] tweet.time = datetime.strptime(status['created_at'], '%a %b %d %H:%M:%S +0000 %Y') tweet.tags = map(lambda x: x['text'], status['entities']['hashtags']) print "more stuff" return tweet.json()
def post(self): user = self.get_user() if not user: return query_id = self.request.get('query_id') query = db.get(query_id) if not query: self.response.out.write('failure!') return # delete all the datapoints associated with the query as well. datapoints = DataPoint.get_by_query(query) for dp in datapoints: dp.delete() # finally, delete the query query.delete()
def integer_overview(query): datapoints = DataPoint.get_by_query(query) return 'Average: ' + float_str_format(average(datapoints))
def query_range(query): datapoints = DataPoint.get_by_query(query) return data_range(datapoints)
def query_average(query): datapoints = DataPoint.get_by_query(query) return average(datapoints)
def time_overview(query): datapoints = DataPoint.get_by_query(query) return 'Average Time: ' + str(average_time(datapoints))
from datetime import datetime from model import DataPoint def format_twitter_status(status): <<<<<<< HEAD """Formats tweets into the correct format for delivery to the client """ if not status['geo']: return None tweet = DataPoint() tweet.content = status['text'] tweet.latitude = status['geo']['coordinates'][0] tweet.longitude = status['geo']['coordinates'][1] tweet.time = datetime.strptime(status['created_at'], '%a %b %d %H:%M:%S +0000 %Y') tweet.tags = map(lambda x: x['text'], status['entities']['hashtags']) return tweet.json() ======= """Formats tweets into the correct format for delivery to the client """ if status['text'].startswith('RT'): return None if not status['geo']: return None tweet = DataPoint() tweet.content = status['text'] tweet.latitude = status['geo']['coordinates'](0)
def get(self): user = self.get_user() if not user: return logout_url = users.create_logout_url(self.request.uri) yesterday = datetime.now() - timedelta(hours=24) new_datapoints = ActionLog.get( action = 'NewDatapoint', timewindow = yesterday, ).count() new_metrics = ActionLog.get( action = 'NewMetric', timewindow = yesterday, ).count() queries_sent = ActionLog.get( action = 'SentQuery', timewindow = yesterday, ).count() sms_sent = ActionLog.get( action = 'SentSMS', timewindow = yesterday, ).count() emails_sent = ActionLog.get( action = 'SentEmail', timewindow = yesterday, ).count() emails_received = ActionLog.get( action = 'ReceivedEmail', timewindow = yesterday, ).count() sms_received = ActionLog.get( action = 'ReceivedSMS', timewindow = yesterday, ).count() new_logins = ActionLog.get( action = 'FirstTimeLogin', timewindow = yesterday, ).count() dashboard_avg_walltime = average_walltime('/dashboard') data_avg_walltime = average_walltime('/data') home_avg_walltime = average_walltime('/') analyze_avg_walltime = average_walltime('/analyze') analyze_json_avg_walltime = average_walltime('/analyzeJSON') dashboard_worst_walltime = worst_walltime('/dashboard') data_worst_walltime = worst_walltime('/data') home_worst_walltime = worst_walltime('/') analyze_worst_walltime = worst_walltime('/analyze') analyze_json_worst_walltime = worst_walltime('/analyzeJSON') # hackey high-number for now. total_metrics = Query.all().count(100000) total_datapoints = DataPoint.all().count(100000) f = open('intern/html/engagement.html') html = f.read() params = { 'new_datapoints': new_datapoints, 'new_metrics': new_metrics, 'total_metrics': total_metrics, 'total_datapoints': total_datapoints, 'queries_sent': queries_sent, 'sms_sent': sms_sent, 'emails_sent': emails_sent, 'sms_received': sms_received, 'emails_received': emails_received, 'new_logins': new_logins, 'dashboard_walltime': dashboard_avg_walltime, 'data_walltime': data_avg_walltime, 'home_walltime': home_avg_walltime, 'analyze_json_walltime': analyze_json_avg_walltime, 'analyze_walltime': analyze_avg_walltime, 'dashboard_worst_walltime': dashboard_worst_walltime, 'data_worst_walltime': data_worst_walltime, 'home_worst_walltime': home_worst_walltime, 'analyze_json_worst_walltime': analyze_json_worst_walltime, 'analyze_worst_walltime': analyze_worst_walltime, } self.response.out.write(html % params)
def text_overview(query): datapoints = DataPoint.get_by_query(query) return 'Common Words: ' + common_words(datapoints)