def process_filter_data(request): err_msg = "" time_curr = datetime.datetime.utcnow() time_dayback = time_curr + datetime.timedelta(hours=-4) _beginning_time_ = TP.timestamp_from_obj(time_dayback, 1, 3) _end_time_ = TP.timestamp_from_obj(time_curr, 1, 3) """ PROCESS POST VARS ================= """ try: latest_utc_ts_var = MySQLdb._mysql.escape_string(request.POST["latest_utc_ts"].strip()) if not (TP.is_timestamp(latest_utc_ts_var, 1)) and not (TP.is_timestamp(latest_utc_ts_var, 2)): raise TypeError if latest_utc_ts_var == "": latest_utc_ts_var = _end_time_ ts_format = TP.getTimestampFormat(latest_utc_ts_var) if ts_format == TP.TS_FORMAT_FORMAT1: latest_utc_ts_var = TP.timestamp_convert_format(latest_utc_ts_var, TP.TS_FORMAT_FORMAT1, TP.TS_FORMAT_FLAT) except KeyError: latest_utc_ts_var = _end_time_ except TypeError: err_msg = "Please enter a valid end-timestamp." latest_utc_ts_var = _end_time_ try: earliest_utc_ts_var = MySQLdb._mysql.escape_string(request.POST["earliest_utc_ts"].strip()) if not (TP.is_timestamp(earliest_utc_ts_var, 1)) and not (TP.is_timestamp(earliest_utc_ts_var, 2)): raise TypeError if earliest_utc_ts_var == "": earliest_utc_ts_var = _beginning_time_ ts_format = TP.getTimestampFormat(earliest_utc_ts_var) if ts_format == TP.TS_FORMAT_FORMAT1: earliest_utc_ts_var = TP.timestamp_convert_format( earliest_utc_ts_var, TP.TS_FORMAT_FORMAT1, TP.TS_FORMAT_FLAT ) except KeyError: earliest_utc_ts_var = _beginning_time_ except TypeError: err_msg = "Please enter a valid start-timestamp." earliest_utc_ts_var = _beginning_time_ return err_msg, earliest_utc_ts_var, latest_utc_ts_var
def get_timestamps_with_interval(self, logFileName, interval): log_end = self.get_timestamps(logFileName)[1] end_obj = TP.timestamp_to_obj(log_end, 1) start_obj = end_obj + datetime.timedelta(minutes=-interval) start_timestamp = TP.timestamp_from_obj(start_obj, 1, 2) end_timestamp = TP.timestamp_from_obj(end_obj, 1, 2) return [start_timestamp, end_timestamp]
def format_query(query_name, sql_stmnt, args, **kwargs): country, min_donation, order_str = process_kwargs(kwargs) if cmp(query_name, 'report_campaign_ecomm') == 0: start_time = args[0] sql_stmnt = str(sql_stmnt % (start_time)) elif cmp(query_name, 'report_campaign_logs') == 0: start_time = args[0] sql_stmnt = str(sql_stmnt % (start_time, start_time, start_time)) elif cmp(query_name, 'report_campaign_ecomm_by_hr') == 0: start_time = args[0] sql_stmnt = str(sql_stmnt % ('%', '%', '%', '%', start_time)) elif cmp(query_name, 'report_campaign_logs_by_hr') == 0: start_time = args[0] sql_stmnt = str(sql_stmnt % ('%', '%', '%', '%', start_time, '%', '%', '%', '%', \ start_time, '%', '%', '%', '%', start_time, '%')) elif cmp(query_name, 'report_impressions_country') == 0: start_time = args[0] sql_stmnt = str(sql_stmnt % ('%', '%', '%', start_time)) elif cmp(query_name, 'report_campaign_logs_by_min') == 0: start_time = args[0] sql_stmnt = str(sql_stmnt % ('%', '%', '%', '%', start_time, '%', '%', '%', '%', \ start_time, '%', '%', '%', '%', start_time)) elif cmp(query_name, 'report_non_US_clicks') == 0: start_time = args[0] sql_stmnt = str(sql_stmnt % ('%', '%', '%', start_time, '%', '%', '%', start_time)) elif cmp(query_name, 'report_contribution_tracking') == 0: start_time = args[0] sql_stmnt = str(sql_stmnt % ('%', '%', '%', '%', '%',start_time)) elif cmp(query_name, 'report_total_amounts_by_hr') == 0: start_time = args[0] end_time = args[1] sql_stmnt = str(sql_stmnt % ('%', '%', '%', ' %H', start_time, end_time)) elif cmp(query_name, 'report_total_amounts_by_day') == 0: start_time = args[0] end_time = args[1] sql_stmnt = str(sql_stmnt % ('%', '%', '%', '', start_time, end_time)) elif cmp(query_name, 'report_LP_metrics') == 0 or cmp(query_name, 'report_LP_metrics_1S') == 0: start_time = args[0] end_time = args[1] campaign = args[2] min_views = args[3] """ Format the condition for minimum views """ if cmp(str(min_views), '-1') == 0: min_views = ' ' else: min_views = 'where lp.views > ' + str(min_views) + ' ' sql_stmnt = str(sql_stmnt % (start_time, end_time, campaign, country, start_time, end_time, campaign, country, start_time, end_time, campaign, country, min_views)) elif cmp(query_name, 'report_banner_metrics') == 0 or cmp(query_name, 'report_bannerLP_metrics') == 0 or cmp(query_name, 'report_total_metrics') == 0 or \ cmp(query_name, 'report_banner_metrics_1S') == 0 or cmp(query_name, 'report_bannerLP_metrics_1S') == 0 or cmp(query_name, 'report_total_metrics_1S') == 0: start_time = args[0] end_time = args[1] campaign = args[2] min_views = args[3] """ Format the condition for minimum views """ if cmp(str(min_views), '-1') == 0: min_views = ' ' else: min_views = 'where lp.views > ' + str(min_views) + ' ' sql_stmnt = str(sql_stmnt % (start_time, end_time, country, start_time, end_time, campaign, country, start_time, end_time, country, \ start_time, end_time, campaign, country, start_time, end_time, campaign, country, min_views)) elif cmp(query_name, 'report_latest_campaign') == 0: start_time = args[0] sql_stmnt = str(sql_stmnt % (start_time)) elif cmp(query_name, 'report_banner_impressions_by_hour') == 0: start = args[0] end = args[1] sql_stmnt = str(sql_stmnt % ('%','%','%','%', start, end)) elif cmp(query_name, 'report_ecomm_by_amount') == 0: start_time = args[0] end_time = args[1] sql_stmnt = str(sql_stmnt % ('%', '%', '%', '%', start_time, end_time, end_time)) elif cmp(query_name, 'report_ecomm_by_contact') == 0: where_str = args[0] sql_stmnt = str(sql_stmnt % ('%', '%', '%', '%', where_str)) elif cmp(query_name, 'report_LP_metrics_minutely') == 0 or cmp(query_name, 'report_LP_metrics_minutely_1S') == 0: start_time = args[0] end_time = args[1] campaign = args[2] interval = args[3] """ The start time for the impression portion of the query should be one second less""" start_time_obj = TP.timestamp_to_obj(start_time,1) imp_start_time_obj = start_time_obj + datetime.timedelta(seconds=-1) imp_start_time_obj_str = TP.timestamp_from_obj(imp_start_time_obj, 1, 3) sql_stmnt = str(sql_stmnt % ('%', '%', '%', '%', interval, interval, start_time, end_time, campaign, country, '%', '%', '%', '%', interval, interval, start_time, end_time, campaign, country, \ start_time, end_time, campaign, country, campaign)) elif cmp(query_name, 'report_banner_metrics_minutely') == 0 or cmp(query_name, 'report_bannerLP_metrics_minutely') == 0 or cmp(query_name, 'report_banner_metrics_minutely_1S') == 0 or cmp(query_name, 'report_bannerLP_metrics_minutely_1S') == 0: start_time = args[0] end_time = args[1] campaign = args[2] interval = args[3] """ The start time for the impression portion of the query should be one second less""" start_time_obj = TP.timestamp_to_obj(start_time,1) imp_start_time_obj = start_time_obj + datetime.timedelta(seconds=-1) imp_start_time_obj_str = TP.timestamp_from_obj(imp_start_time_obj, 1, 3) sql_stmnt = str(sql_stmnt % ('%', '%', '%', '%', interval, interval, imp_start_time_obj_str, end_time, \ country, '%', '%', '%', '%', interval, interval, start_time, end_time, campaign, country, \ '%', '%', '%', '%', interval, interval, start_time, end_time, country, \ '%', '%', '%', '%', interval, interval, start_time, end_time, campaign, \ country, start_time, end_time, campaign, country, campaign, )) elif cmp(query_name, 'report_campaign_metrics_minutely') == 0 or cmp(query_name, 'report_campaign_metrics_minutely_1S') == 0 or cmp(query_name, 'report_campaign_metrics_minutely_total') == 0 \ or cmp(query_name, 'report_campaign_metrics_minutely_total_1S') == 0: start_time = args[0] end_time = args[1] campaign = args[2] interval = args[3] sql_stmnt = str(sql_stmnt % (campaign, '%', '%', '%', '%', interval, interval, start_time, end_time, campaign, country, '%', '%', '%', '%', interval, interval, start_time, end_time, campaign, country)) elif cmp(query_name, 'report_campaign_totals') == 0: start_time = args[0] end_time = args[1] sql_stmnt = str(sql_stmnt % (start_time, end_time)) elif cmp(query_name, 'report_campaign_banners') == 0: start_time = args[0] end_time = args[1] utm_campaign = args[2] sql_stmnt = str(sql_stmnt % (start_time, end_time, utm_campaign)) elif cmp(query_name, 'report_campaign_lps') == 0: start_time = args[0] end_time = args[1] utm_campaign = args[2] sql_stmnt = str(sql_stmnt % (start_time, end_time, utm_campaign)) elif cmp(query_name, 'report_campaign_bannerlps') == 0: start_time = args[0] end_time = args[1] utm_campaign = args[2] sql_stmnt = str(sql_stmnt % (start_time, end_time, utm_campaign)) elif cmp(query_name, 'report_campaign_metrics_minutely_all') == 0 or cmp(query_name, 'report_banner_metrics_minutely_all') == 0 or cmp(query_name, 'report_lp_metrics_minutely_all') == 0: start_time = args[0] end_time = args[1] interval = args[3] sql_stmnt = str(sql_stmnt % ('%', '%', '%', '%', interval, interval, start_time, end_time)) elif cmp(query_name, 'report_donation_metrics') == 0: start_time = args[0] end_time = args[1] campaign = args[2] sql_stmnt = str(sql_stmnt % (start_time, end_time, campaign, country, start_time, end_time, campaign, country)) elif cmp(query_name, 'report_total_donations') == 0: start_time = args[0] end_time = args[1] campaign = args[2] """ Recursively construct the sub-query """ sub_query_name = 'report_donation_metrics' sub_query_sql = Hlp.file_to_string(projSet.__sql_home__ + sub_query_name + '.sql') sub_query_sql = format_query(sub_query_name, sub_query_sql, [start_time, end_time, campaign], country=country) sql_stmnt = str(sql_stmnt % sub_query_sql) elif cmp(query_name, 'report_daily_totals_by_country') == 0: start_time = args[0] end_time = args[1] sql_stmnt = str(sql_stmnt % ('%', '%', '%', start_time, end_time, country, min_donation, order_str)) else: return 'no such table\n' return sql_stmnt
def execute_process(self, key, **kwargs): logging.info('Commencing caching of fundraiser totals data at: %s' % self.CACHING_HOME) end_time = TP.timestamp_from_obj(datetime.datetime.utcnow(), 1, 3) """ DATA CONFIG """ """ set the metrics to plot """ lttdl = DL.LongTermTrendsLoader(db='db1025') start_of_2011_fundraiser = '20111116000000' countries = DL.CiviCRMLoader().get_ranked_donor_countries( start_of_2011_fundraiser) countries.append('Total') """ Dictionary object storing lists of regexes - each expression must pass for a label to persist """ year_groups = dict() for country in countries: if cmp(country, 'Total') == 0: year_groups['2011 Total'] = ['2011.*'] year_groups['2010 Total'] = ['2010.*'] else: year_groups['2011 ' + country] = ['2011' + country] year_groups['2010 ' + country] = ['2010' + country] metrics = 'amount' weights = '' groups = year_groups group_metrics = ['year', 'country'] metric_types = DL.LongTermTrendsLoader._MT_AMOUNT_ include_totals = False include_others = False hours_back = 0 time_unit = TP.DAY """ END CONFIG """ """ For each metric use the LongTermTrendsLoader to generate the data to plot """ dr = DR.DataReporting() times, counts = lttdl.run_fundrasing_totals(end_time, metric_name=metrics, metric_type=metric_types, groups=groups, group_metric=group_metrics, include_other=include_others, \ include_total=include_totals, hours_back=hours_back, weight_name=weights, time_unit=time_unit) dict_param = dict() for country in countries: key_2011 = '2011 ' + country key_2010 = '2010 ' + country new_counts = dict() new_counts[key_2010] = counts[key_2010] new_counts[key_2011] = counts[key_2011] new_times = dict() new_times[key_2010] = times[key_2010] new_times[key_2011] = times[key_2011] dr._counts_ = new_counts dr._times_ = new_times empty_data = [0] * len(new_times[new_times.keys()[0]]) data = list() data.append(dr.get_data_lists([''], empty_data)) dict_param[country] = Hlp.combine_data_lists(data) self.clear_cached_data(key) self.cache_data(dict_param, key) logging.info('Caching complete.')
def execute_process(self, key, **kwargs): logging.info('Commencing caching of live results data at: %s' % self.CACHING_HOME) shelve_key = key """ Find the earliest and latest page views for a given campaign """ lptl = DL.LandingPageTableLoader(db='db1025') query_name = 'report_summary_results_country.sql' query_name_1S = 'report_summary_results_country_1S.sql' campaign_regexp_filter = '^C_|^C11_' dl = DL.DataLoader(db='db1025') end_time, start_time = TP.timestamps_for_interval( datetime.datetime.utcnow(), 1, hours=-self.DURATION_HRS) """ Should a one-step query be used? """ use_one_step = lptl.is_one_step( start_time, end_time, 'C11' ) # Assume it is a one step test if there are no impressions for this campaign in the landing page table """ Retrieve the latest time for which impressions have been loaded =============================================================== """ sql_stmnt = 'select max(end_time) as latest_ts from squid_log_record where log_completion_pct = 100.00' results = dl.execute_SQL(sql_stmnt) latest_timestamp = results[0][0] latest_timestamp = TP.timestamp_from_obj(latest_timestamp, 2, 3) latest_timestamp_flat = TP.timestamp_convert_format( latest_timestamp, 2, 1) ret = DR.ConfidenceReporting(query_type='', hyp_test='', db='db1025').get_confidence_on_time_range( start_time, end_time, campaign_regexp_filter, one_step=use_one_step) measured_metrics_counts = ret[1] """ Prepare Summary results """ sql_stmnt = Hlp.file_to_string(projSet.__sql_home__ + query_name) sql_stmnt = sql_stmnt % (start_time, latest_timestamp_flat, start_time, latest_timestamp_flat, campaign_regexp_filter, start_time, latest_timestamp_flat, \ start_time, end_time, campaign_regexp_filter, start_time, end_time, campaign_regexp_filter, start_time, end_time, campaign_regexp_filter, \ start_time, latest_timestamp_flat, campaign_regexp_filter, start_time, latest_timestamp_flat, campaign_regexp_filter) logging.info('Executing report_summary_results ...') results = dl.execute_SQL(sql_stmnt) column_names = dl.get_column_names() if use_one_step: logging.info('... including one step artifacts ...') sql_stmnt_1S = Hlp.file_to_string(projSet.__sql_home__ + query_name_1S) sql_stmnt_1S = sql_stmnt_1S % (start_time, latest_timestamp_flat, start_time, latest_timestamp_flat, campaign_regexp_filter, start_time, latest_timestamp_flat, \ start_time, end_time, campaign_regexp_filter, start_time, end_time, campaign_regexp_filter, start_time, end_time, campaign_regexp_filter, \ start_time, latest_timestamp_flat, campaign_regexp_filter, start_time, latest_timestamp_flat, campaign_regexp_filter) results = list(results) results_1S = dl.execute_SQL(sql_stmnt_1S) """ Ensure that the results are unique """ one_step_keys = list() for row in results_1S: one_step_keys.append(str(row[0]) + str(row[1]) + str(row[2])) new_results = list() for row in results: key = str(row[0]) + str(row[1]) + str(row[2]) if not (key in one_step_keys): new_results.append(row) results = new_results results.extend(list(results_1S)) metric_legend_table = DR.DataReporting().get_standard_metrics_legend() conf_legend_table = DR.ConfidenceReporting( query_type='bannerlp', hyp_test='TTest').get_confidence_legend_table() """ Create a interval loader objects """ sampling_interval = 5 # 5 minute sampling interval for donation plots ir_cmpgn = DR.IntervalReporting(query_type=FDH._QTYPE_CAMPAIGN_ + FDH._QTYPE_TIME_, generate_plot=False, db='db1025') ir_banner = DR.IntervalReporting(query_type=FDH._QTYPE_BANNER_ + FDH._QTYPE_TIME_, generate_plot=False, db='db1025') ir_lp = DR.IntervalReporting(query_type=FDH._QTYPE_LP_ + FDH._QTYPE_TIME_, generate_plot=False, db='db1025') """ Execute queries """ ir_cmpgn.run(start_time, end_time, sampling_interval, 'donations', '', {}) ir_banner.run(start_time, end_time, sampling_interval, 'donations', '', {}) ir_lp.run(start_time, end_time, sampling_interval, 'donations', '', {}) """ Prepare serialized objects """ dict_param = dict() dict_param['metric_legend_table'] = metric_legend_table dict_param['conf_legend_table'] = conf_legend_table dict_param['measured_metrics_counts'] = measured_metrics_counts dict_param['results'] = results dict_param['column_names'] = column_names dict_param['interval'] = sampling_interval dict_param['duration'] = self.DURATION_HRS dict_param['start_time'] = TP.timestamp_convert_format( start_time, 1, 2) dict_param['end_time'] = TP.timestamp_convert_format(end_time, 1, 2) dict_param['ir_cmpgn_counts'] = ir_cmpgn._counts_ dict_param['ir_banner_counts'] = ir_banner._counts_ dict_param['ir_lp_counts'] = ir_lp._counts_ dict_param['ir_cmpgn_times'] = ir_cmpgn._times_ dict_param['ir_banner_times'] = ir_banner._times_ dict_param['ir_lp_times'] = ir_lp._times_ self.clear_cached_data(shelve_key) self.cache_data(dict_param, shelve_key) logging.info('Caching complete.')
def index(request, **kwargs): crl = DL.CampaignReportingLoader(query_type='totals') filter_data = True """ Determine the start and end times for the query """ start_time_obj = datetime.datetime.utcnow() + datetime.timedelta(days=-1) end_time = TP.timestamp_from_obj(datetime.datetime.utcnow(), 1, 3) start_time = TP.timestamp_from_obj(start_time_obj, 1, 3) """ PROCESS POST KWARGS =================== """ err_msg = '' try: err_msg = str(kwargs['kwargs']['err_msg']) except: pass """ PROCESS POST VARS ================= """ """ Process error message """ try: err_msg = MySQLdb._mysql.escape_string(request.POST['err_msg']) except KeyError: pass """ If the filter form was submitted extract the POST vars """ try: min_donations_var = MySQLdb._mysql.escape_string( request.POST['min_donations'].strip()) earliest_utc_ts_var = MySQLdb._mysql.escape_string( request.POST['utc_ts'].strip()) """ If the user timestamp is earlier than the default start time run the query for the earlier start time """ ts_format = TP.getTimestampFormat(earliest_utc_ts_var) """ Ensure the validity of the timestamp input """ if ts_format == TP.TS_FORMAT_FORMAT1: start_time = TP.timestamp_convert_format(earliest_utc_ts_var, TP.TS_FORMAT_FORMAT1, TP.TS_FORMAT_FLAT) elif ts_format == TP.TS_FORMAT_FLAT: start_time = earliest_utc_ts_var elif cmp(earliest_utc_ts_var, '') == 0: start_time = TP.timestamp_from_obj(start_time_obj, 1, 3) else: raise Exception() if cmp(min_donations_var, '') == 0: min_donations_var = -1 else: min_donations_var = int(min_donations_var) except KeyError: # In the case the form was not submitted set minimum donations and retain the default start time min_donations_var = -1 pass except Exception: # In the case the form was incorrectly formatted notify the user min_donations_var = -1 start_time = TP.timestamp_from_obj(start_time_obj, 1, 3) err_msg = 'Filter fields are incorrect.' """ GENERATE CAMPAIGN DATA ====================== """ campaigns, all_data = crl.run_query({ 'metric_name': 'earliest_timestamp', 'start_time': start_time, 'end_time': end_time }) """ Sort campaigns by earliest access """ sorted_campaigns = sorted(campaigns.iteritems(), key=operator.itemgetter(1)) sorted_campaigns.reverse() """ FILTER CAMPAIGN DATA ==================== """ new_sorted_campaigns = list() for campaign in sorted_campaigns: key = campaign[0] if campaign[1] > 0: name = all_data[key][0] if name == None: name = 'none' timestamp = TP.timestamp_convert_format(all_data[key][3], 1, 2) if filter_data: if all_data[key][2] > min_donations_var: new_sorted_campaigns.append([ campaign[0], campaign[1], name, timestamp, all_data[key][2], all_data[key][4] ]) else: new_sorted_campaigns.append([ campaign[0], campaign[1], name, timestamp, all_data[key][2], all_data[key][4] ]) sorted_campaigns = new_sorted_campaigns return render_to_response('campaigns/index.html', { 'campaigns': sorted_campaigns, 'err_msg': err_msg }, context_instance=RequestContext(request))
def mine_squid_landing_page_requests(self, logFileName): logging.info("Begin mining of landing page requests in %s" % logFileName) """ Create the dataloaders and initialize """ sltl = DL.SquidLogTableLoader() lptl = DL.LandingPageTableLoader() ipctl = DL.IPCountryTableLoader() """ Retrieve the log timestamp from the filename """ time_stamps = self.get_timestamps_with_interval(logFileName, self._log_copy_interval_) end = time_stamps[1] curr_time = TP.timestamp_from_obj(datetime.datetime.utcnow(), 1, 3) """ retrieve the start time of the log """ start = self.get_first_timestamp_from_log(logFileName) """ Initialization - open the file """ logFile, total_lines_in_file = self.open_logfile(logFileName) # Initialization hostIndex = 1 queryIndex = 4 pathIndex = 2 """ Clear the old records """ self._clear_squid_records(start, self._LP_REQUEST_) """ Add a row to the SquidLogTable """ sltl.insert_row( type="lp_view", log_copy_time=curr_time, start_time=start, end_time=end, log_completion_pct="0.0", total_rows="0", ) line_count = 0 requests_loaded = 0 """ Extract the mining patterns from the DB """ mptl = DL.MiningPatternsTableLoader() lp_patterns = mptl.get_pattern_lists()[1] """ PROCESS REQUESTS FROM FILE ========================== Sample request: line = "sq63.wikimedia.org 757671483 2011-06-01T23:00:01.343 93 98.230.113.246 TCP_MISS/200 10201 GET \ http://wikimediafoundation.org/w/index.php?title=WMFJA085/en/US&utm_source=donate&utm_medium=sidebar&utm_campaign=20101204SB002&country_code=US&referrer=http%3A%2F%2Fen.wikipedia.org%2Fwiki%2FFile%3AMurphy_High_School.jpg CARP/208.80.152.83 text/html http://en.wikipedia.org/wiki/File:Murphy_High_School.jpg \ - Mozilla/4.0%20(compatible;%20MSIE%208.0;%20Windows%20NT%206.1;%20WOW64;%20Trident/4.0;%20FunWebProducts;%20GTB6.6;%20SLCC2;%20.NET%20CLR%202.0.50727;%20.NET%20CLR%203.5.30729;%20.NET%20CLR%203.0.30729;%20Media%20Center%20PC%206.0;%20HPDTDF;%20.NET4.0C)" """ line = logFile.readline() while line != "": lineArgs = line.split() """ Get the IP Address of the donor """ ip_add = lineArgs[4] # SELECT CAST('20070529 00:00:00' AS datetime) """ Parse the Timestamp: Sample timestamp: timestamp = "2011-06-01T23:00:07.612" """ date_and_time = lineArgs[2] date_string = date_and_time.split("-") time_string = date_and_time.split(":") # if the date is not logged ignoere the record try: year = date_string[0] month = date_string[1] day = date_string[2][:2] hour = time_string[0][-2:] min = time_string[1] sec = time_string[2][:2] except: line = logFile.readline() total_lines_in_file = total_lines_in_file - 1 continue timestamp_string = year + "-" + month + "-" + day + " " + hour + ":" + min + ":" + sec """ Process referrer URL =================== Sample referrer: referrer_url = http://en.wikipedia.org/wiki/File:Murphy_High_School.jpg """ try: referrer_url = lineArgs[11] except IndexError: referrer_url = "Unavailable" parsed_referrer_url = up.urlparse(referrer_url) if parsed_referrer_url[hostIndex] == None: project = "NONE" source_lang = "NONE" else: hostname = parsed_referrer_url[hostIndex].split(".") """ If the hostname of the form '<lang>.<project>.org' """ if len(hostname[0]) <= 2: # referrer_path = parsed_referrer_url[pathIndex].split('/') project = hostname[0] # wikimediafoundation.org source_lang = hostname[0] else: try: """ species.wikimedia vs en.wikinews """ project = hostname[0] if (hostname[1] == "wikimedia") else hostname[1] """ pl.wikipedia vs commons.wikimedia """ source_lang = hostname[0] if (len(hostname[1]) < 5) else "en" except: project = "wikipedia" """ default project to 'wikipedia' """ source_lang = "en" """ default lang to english """ """ Process User agent string ======================== sample user agent string: user_agent_string = Mozilla/4.0%20(compatible;%20MSIE%208.0;%20Windows%20NT%206.1;%20WOW64;%20Trident/4.0;%20FunWebProducts;%20GTB6.6;%20SLCC2;%20.NET%20CLR%202.0.50727;%20.NET%20CLR%203.5.30729;%20.NET%20CLR%203.0.30729;%20Media%20Center%20PC%206.0;%20HPDTDF;%20.NET4.0C) """ try: user_agent_string = lineArgs[13] except IndexError: user_agent_string = "" try: user_agent_fields = httpagentparser.detect(user_agent_string) browser = "NONE" # Check to make sure fields exist if len(user_agent_fields["browser"]) != 0: if len(user_agent_fields["browser"]["name"]) != 0: browser = user_agent_fields["browser"]["name"] except: logging.error("Could not process user agent string.") browser = "NONE" """ Process landing URL =================== sample landing urls: landing_url = "http://wikimediafoundation.org/w/index.php?title=WMFJA085/en/US&utm_source=donate&utm_medium=sidebar&utm_campaign=20101204SB002&country_code=US&referrer=http%3A%2F%2Fen.wikipedia.org%2Fwiki%2FFile%3AMurphy_High_School.jpg" landing_url = "http://wikimediafoundation.org/wiki/WMFJA1/ru" landing_url = *donate.wikimedia.org/wiki/Special:FundraiserLandingPage?uselang=en&country=US&template=Lp-layout-default&appeal=Appeal-default&form-countryspecific=Form-countryspecific-control&utm_medium=sitenotice&utm_source=B11_Donate_Jimmy_Control&utm_campaign=C11_1107 """ try: landing_url = lineArgs[8] except IndexError: landing_url = "Unavailable" hostIndex = 1 queryIndex = 4 pathIndex = 2 parsed_landing_url = up.urlparse(landing_url) query_fields = cgi.parse_qs(parsed_landing_url[queryIndex]) # Get the banner name and lang path_pieces = parsed_landing_url[pathIndex].split("/") include_request, url_match = self.evaluate_landing_url( landing_url, parsed_landing_url, query_fields, path_pieces, lp_patterns ) if include_request: """ Extract the language from the query string the language has already been read from the url path but if it exists in the query string this setting should take precedence """ try: source_lang = query_fields["language"][0] except: pass """ Address cases where the query string contains the landing page - ...wikimediafoundation.org/w/index.php?... """ # http://wikimediafoundation.org/wiki/ if url_match == 1: """ Address cases where the query string does not contain the landing page - ...wikimediafoundation.org/wiki/... """ parsed_landing_url = up.urlparse(landing_url) query_fields = cgi.parse_qs(parsed_landing_url[queryIndex]) # Get the banner name and lang landing_path = parsed_landing_url[pathIndex].split("/") landing_page = landing_path[2] # URLs of the form ...?county_code=<iso_code> try: country = query_fields["country"][0] # URLs of the form ...<path>/ <lp_name>/<lang>/<iso_code> except: try: if len(landing_path) == 5: country = landing_path[4] # source_lang = landing_path[3] else: country = landing_path[3] except: logging.info("Could not parse country from landing path: %s", landing_url) line = logFile.readline() total_lines_in_file = total_lines_in_file - 1 continue # http://wikimediafoundation.org/w/index.php? elif url_match == 2: try: """ URLs of the form ...?title=<lp_name> """ lp_country = query_fields["title"][0].split("/") landing_page = lp_country[0] """ URLs of the form ...?county_code=<iso_code> """ try: country = query_fields["country"][0] except: """ URLs of the form ...?title=<lp_name>/<lang>/<iso_code> """ if len(lp_country) == 3: country = lp_country[2] else: country = lp_country[1] except: logging.info("Could not parse landing page request from query string: %s", landing_url) line = logFile.readline() total_lines_in_file = total_lines_in_file - 1 continue # donate.wikimedia.org/wiki/Special:FundraiserLandingPage? elif url_match == 3: try: # e.g. uselang=en&country=US&template=Lp-layout-default&appeal=Appeal-default&form-countryspecific=Form-countryspecific-control&utm_medium=sitenotice&utm_source=B11_Donate_Jimmy_Control&utm_campaign=C11_1107 source_lang = query_fields["uselang"][0] country = query_fields["country"][0] landing_page = ( query_fields["template"][0].split("-")[2] + "~" + query_fields["appeal-template"][0].split("-")[2] + "~" + query_fields["appeal"][0].split("-")[1] + "~" + query_fields["form-template"][0].split("-")[2] + "~" + query_fields["form-countryspecific"][0].split("-")[2] ) utm_source = query_fields["utm_source"][0] utm_campaign = query_fields["utm_campaign"][0] + "_" + country utm_medium = query_fields["utm_medium"][0] except Exception as inst: # logging.info(inst) # __str__ allows args to printed directly # logging.info('Could not parse landing page request from query string: %s', landing_url) line = logFile.readline() total_lines_in_file = total_lines_in_file - 1 continue """ If country is confused with the language use the ip """ if country == country.lower(): # logging.info('Using geo-locator to set ip-address: %s', landing_url) country = ipctl.localize_IP(ip_add) """ Ensure fields providing request ID exist """ try: utm_source = query_fields["utm_source"][0] utm_campaign = query_fields["utm_campaign"][0] utm_medium = query_fields["utm_medium"][0] except KeyError: line = logFile.readline() total_lines_in_file = total_lines_in_file - 1 continue """ Insert record into the landing_page_requests table """ lptl.insert_row( utm_source_arg=utm_source, utm_campaign_arg=utm_campaign, utm_medium_arg=utm_medium, landing_page_arg=landing_page, page_url_arg=landing_url, referrer_url_arg=referrer_url, browser_arg=browser, lang_arg=source_lang, country_arg=country, project_arg=project, ip_arg=ip_add, start_timestamp_arg=start, timestamp_arg=timestamp_string, ) requests_loaded = requests_loaded + 1 line = logFile.readline() line_count = line_count + 1 """ Log Miner Logging - Update the squid_log_record table """ if (line_count % 1000) == 0 or line_count == total_lines_in_file: completion = float(line_count / total_lines_in_file) * 100.0 sltl.update_table_row( type="lp_view", log_copy_time=curr_time, start_time=start, end_time=end, log_completion_pct=completion.__str__(), total_rows=line_count.__str__(), )
def mine_squid_impression_requests(self, logFileName): logging.info("Begin mining of banner impressions in %s" % logFileName) sltl = DL.SquidLogTableLoader() itl = DL.ImpressionTableLoader() """ Retrieve the log timestamp from the filename """ time_stamps = self.get_timestamps_with_interval(logFileName, self._log_copy_interval_) """ retrieve the start time of the log """ start = self.get_first_timestamp_from_log(logFileName) end = time_stamps[1] curr_time = TP.timestamp_from_obj(datetime.datetime.now(), 1, 3) """ Initialization - open the file """ logFile, total_lines_in_file = self.open_logfile(logFileName) queryIndex = 4 counts = Hlp.AutoVivification() # insertStmt = 'INSERT INTO ' + self._impression_table_name_ + self._BANNER_FIELDS_ + ' values ' """ Clear the old records """ self._clear_squid_records(start, self._BANNER_REQUEST_) """ Add a row to the SquidLogTable """ sltl.insert_row( type="banner_impression", log_copy_time=curr_time, start_time=start, end_time=end, log_completion_pct="0.0", total_rows="0", ) """ PROCESS LOG FILE ================ Sample Request: line = "sq63.wikimedia.org 757675855 2011-06-01T23:00:07.612 0 187.57.227.121 TCP_MEM_HIT/200 1790 GET \ http://meta.wikimedia.org/w/index.php?title=Special:BannerLoader&banner=B20110601_JWJN001_BR&userlang=pt&db=ptwiki&sitename=Wikip%C3%A9dia&country=BR NONE/- text/javascript http://pt.wikipedia.org/wiki/Modo_e_tempo_verbal \ - Mozilla/5.0%20(Windows%20NT%206.1)%20AppleWebKit/534.24%20(KHTML,%20like%20Gecko)%20Chrome/11.0.696.71%20Safari/534.24" """ line_count = 0 line = logFile.readline() while line != "": lineArgs = line.split() """ Parse the Timestamp: Sample timestamp: timestamp = "2011-06-01T23:00:07.612" """ try: time_stamp = lineArgs[2] time_bits = time_stamp.split("T") date_fields = time_bits[0].split("-") time_fields = time_bits[1].split(":") time_stamp = date_fields[0] + date_fields[1] + date_fields[2] + time_fields[0] + time_fields[1] + "00" except (ValueError, IndexError): line = logFile.readline() total_lines_in_file = total_lines_in_file - 1 continue # pass """ Parse the URL: Sample url: url = "http://meta.wikimedia.org/w/index.php?title=Special:BannerLoader&banner=B20110601_JWJN001_BR&userlang=pt&db=ptwiki&sitename=Wikip%C3%A9dia&country=BR" """ try: url = lineArgs[8] except IndexError: url = "Unavailable" parsedUrl = up.urlparse(url) query = parsedUrl[queryIndex] queryBits = cgi.parse_qs(query) """ Extract - project, banner, language, & country data from the url """ project = "" if "db" in queryBits.keys(): project = queryBits["db"][0] if project == "" and "sitename" in queryBits.keys(): sitename = queryBits["sitename"][0] if sitename: project = sitename else: project = "NONE" if "banner" in queryBits.keys(): banner = queryBits["banner"][0] else: banner = "NONE" if "userlang" in queryBits.keys(): lang = queryBits["userlang"][0] else: lang = "NONE" if "country" in queryBits.keys(): country = queryBits["country"][0] else: country = "NONE" """ Group banner impression counts based on (banner, country, project, language) """ try: counts[banner][country][project][lang][time_stamp] = ( counts[banner][country][project][lang][time_stamp] + 1 ) except TypeError: counts[banner][country][project][lang][time_stamp] = 1 line = logFile.readline() line_count = line_count + 1 """ Log Miner Logging - Update the squid_log_record table """ if line_count % 10000 == 0 or line_count == total_lines_in_file: completion = float(line_count / total_lines_in_file) * 100.0 sltl.update_table_row( type="banner_impression", log_copy_time=curr_time, start_time=start, end_time=end, log_completion_pct=completion.__str__(), total_rows=line_count.__str__(), ) """ ====== FILE COMPLETE ====== """ logFile.close() """ Break out impression data by minute. This conditional detects when a request with a previously unseen minute in the timestamp appears. Run through the counts dictionary and insert a row into the banner impressions table for each entry """ bannerKeys = counts.keys() for banner_ind in range(len(bannerKeys)): banner = bannerKeys[banner_ind] countryCounts = counts[banner] countryKeys = countryCounts.keys() for country_ind in range(len(countryKeys)): country = countryKeys[country_ind] projectCounts = countryCounts[country] projectKeys = projectCounts.keys() for project_ind in range(len(projectKeys)): project = projectKeys[project_ind] langCounts = projectCounts[project] langKeys = langCounts.keys() for lang_ind in range(len(langKeys)): lang = langKeys[lang_ind] timestampCounts = langCounts[lang] timestampKeys = timestampCounts.keys() for timestamp_ind in range(len(timestampKeys)): timestamp = timestampKeys[timestamp_ind] count = timestampCounts[timestamp] itl.insert_row( utm_source_arg=banner, referrer_arg=project, country_arg=country, lang_arg=lang, counts_arg=str(count), on_minute_arg=timestamp, start_timestamp_arg=start, )
def daily_totals(request): err_msg = '' start_day_ts = TP.timestamp_from_obj( datetime.datetime.utcnow() + datetime.timedelta(days=-1), 1, 0) end_day_ts = TP.timestamp_from_obj(datetime.datetime.utcnow(), 1, 0) country = '.{2}' min_donation = 0 order_str = 'order by 1 desc,3 desc' """ PROCESS POST """ if 'start_day_ts' in request.POST: if cmp(request.POST['start_day_ts'], '') != 0: start_day_ts = MySQLdb._mysql.escape_string( request.POST['start_day_ts'].strip()) format = TP.getTimestampFormat(start_day_ts) if format == 2: start_day_ts = TP.timestamp_convert_format(start_day_ts, 2, 1) # start_day_ts = start_day_ts[:8] + '000000' elif format == -1: err_msg = err_msg + 'Start timestamp is formatted incorrectly\n' if 'end_day_ts' in request.POST: if cmp(request.POST['end_day_ts'], '') != 0: end_day_ts = MySQLdb._mysql.escape_string( request.POST['end_day_ts'].strip()) format = TP.getTimestampFormat(start_day_ts) if format == 2: end_day_ts = TP.timestamp_convert_format(end_day_ts, 2, 1) # end_day_ts = end_day_ts[:8] + '000000' elif format == -1: err_msg = err_msg + 'End timestamp is formatted incorrectly\n' if 'country' in request.POST: if cmp(request.POST['country'], '') != 0: country = MySQLdb._mysql.escape_string(request.POST['country']) if 'min_donation' in request.POST: if cmp(request.POST['min_donation'], '') != 0: try: min_donation = int( MySQLdb._mysql.escape_string( request.POST['min_donation'].strip())) except: logging.error( 'live_results/daily_totals -- Could not process minimum donation for "%s" ' % request.POST['min_donation'].strip()) min_donation = 0 if 'order_metric' in request.POST: if cmp(request.POST['order_metric'], 'Date') == 0: order_str = 'order by 1 desc,3 desc' elif cmp(request.POST['order_metric'], 'Country') == 0: order_str = 'order by 2 asc,1 desc' """ === END POST === """ query_name = 'report_daily_totals_by_country' filename = projSet.__sql_home__ + query_name + '.sql' sql_stmnt = Hlp.file_to_string(filename) sql_stmnt = QD.format_query(query_name, sql_stmnt, [start_day_ts, end_day_ts], country=country, min_donation=min_donation, order_str=order_str) dl = DL.DataLoader() results = dl.execute_SQL(sql_stmnt) html_table = DR.DataReporting()._write_html_table( results, dl.get_column_names(), use_standard_metric_names=True) return render_to_response('live_results/daily_totals.html', \ {'html_table' : html_table, 'start_time' : TP.timestamp_convert_format(start_day_ts, 1, 2), 'end_time' : TP.timestamp_convert_format(end_day_ts, 1, 2)}, \ context_instance=RequestContext(request))
def execute_process(self, key, **kwargs): logging.info('Commencing caching of fundraiser totals data at: %s' % self.CACHING_HOME) end_time = TP.timestamp_from_obj(datetime.datetime.utcnow(), 1, 3) """ DATA CONFIG """ """ set the metrics to plot """ lttdl = DL.LongTermTrendsLoader(db='db1025') start_of_2011_fundraiser = '20111116000000' countries = DL.CiviCRMLoader().get_ranked_donor_countries(start_of_2011_fundraiser) countries.append('Total') """ Dictionary object storing lists of regexes - each expression must pass for a label to persist """ year_groups = dict() for country in countries: if cmp(country, 'Total') == 0: year_groups['2011 Total'] = ['2011.*'] year_groups['2010 Total'] = ['2010.*'] else: year_groups['2011 ' + country] = ['2011' + country] year_groups['2010 ' + country] = ['2010' + country] metrics = 'amount' weights = '' groups = year_groups group_metrics = ['year', 'country'] metric_types = DL.LongTermTrendsLoader._MT_AMOUNT_ include_totals = False include_others = False hours_back = 0 time_unit = TP.DAY """ END CONFIG """ """ For each metric use the LongTermTrendsLoader to generate the data to plot """ dr = DR.DataReporting() times, counts = lttdl.run_fundrasing_totals(end_time, metric_name=metrics, metric_type=metric_types, groups=groups, group_metric=group_metrics, include_other=include_others, \ include_total=include_totals, hours_back=hours_back, weight_name=weights, time_unit=time_unit) dict_param = dict() for country in countries: key_2011 = '2011 ' + country key_2010 = '2010 ' + country new_counts = dict() new_counts[key_2010] = counts[key_2010] new_counts[key_2011] = counts[key_2011] new_times = dict() new_times[key_2010] = times[key_2010] new_times[key_2011] = times[key_2011] dr._counts_ = new_counts dr._times_ = new_times empty_data = [0] * len(new_times[new_times.keys()[0]]) data = list() data.append(dr.get_data_lists([''], empty_data)) dict_param[country] = Hlp.combine_data_lists(data) self.clear_cached_data(key) self.cache_data(dict_param, key) logging.info('Caching complete.')
def execute_process(self, key, **kwargs): logging.info('Commencing caching of live results data at: %s' % self.CACHING_HOME) shelve_key = key """ Find the earliest and latest page views for a given campaign """ lptl = DL.LandingPageTableLoader(db='db1025') query_name = 'report_summary_results_country.sql' query_name_1S = 'report_summary_results_country_1S.sql' campaign_regexp_filter = '^C_|^C11_' dl = DL.DataLoader(db='db1025') end_time, start_time = TP.timestamps_for_interval(datetime.datetime.utcnow(), 1, hours=-self.DURATION_HRS) """ Should a one-step query be used? """ use_one_step = lptl.is_one_step(start_time, end_time, 'C11') # Assume it is a one step test if there are no impressions for this campaign in the landing page table """ Retrieve the latest time for which impressions have been loaded =============================================================== """ sql_stmnt = 'select max(end_time) as latest_ts from squid_log_record where log_completion_pct = 100.00' results = dl.execute_SQL(sql_stmnt) latest_timestamp = results[0][0] latest_timestamp = TP.timestamp_from_obj(latest_timestamp, 2, 3) latest_timestamp_flat = TP.timestamp_convert_format(latest_timestamp, 2, 1) ret = DR.ConfidenceReporting(query_type='', hyp_test='', db='db1025').get_confidence_on_time_range(start_time, end_time, campaign_regexp_filter, one_step=use_one_step) measured_metrics_counts = ret[1] """ Prepare Summary results """ sql_stmnt = Hlp.file_to_string(projSet.__sql_home__ + query_name) sql_stmnt = sql_stmnt % (start_time, latest_timestamp_flat, start_time, latest_timestamp_flat, campaign_regexp_filter, start_time, latest_timestamp_flat, \ start_time, end_time, campaign_regexp_filter, start_time, end_time, campaign_regexp_filter, start_time, end_time, campaign_regexp_filter, \ start_time, latest_timestamp_flat, campaign_regexp_filter, start_time, latest_timestamp_flat, campaign_regexp_filter) logging.info('Executing report_summary_results ...') results = dl.execute_SQL(sql_stmnt) column_names = dl.get_column_names() if use_one_step: logging.info('... including one step artifacts ...') sql_stmnt_1S = Hlp.file_to_string(projSet.__sql_home__ + query_name_1S) sql_stmnt_1S = sql_stmnt_1S % (start_time, latest_timestamp_flat, start_time, latest_timestamp_flat, campaign_regexp_filter, start_time, latest_timestamp_flat, \ start_time, end_time, campaign_regexp_filter, start_time, end_time, campaign_regexp_filter, start_time, end_time, campaign_regexp_filter, \ start_time, latest_timestamp_flat, campaign_regexp_filter, start_time, latest_timestamp_flat, campaign_regexp_filter) results = list(results) results_1S = dl.execute_SQL(sql_stmnt_1S) """ Ensure that the results are unique """ one_step_keys = list() for row in results_1S: one_step_keys.append(str(row[0]) + str(row[1]) + str(row[2])) new_results = list() for row in results: key = str(row[0]) + str(row[1]) + str(row[2]) if not(key in one_step_keys): new_results.append(row) results = new_results results.extend(list(results_1S)) metric_legend_table = DR.DataReporting().get_standard_metrics_legend() conf_legend_table = DR.ConfidenceReporting(query_type='bannerlp', hyp_test='TTest').get_confidence_legend_table() """ Create a interval loader objects """ sampling_interval = 5 # 5 minute sampling interval for donation plots ir_cmpgn = DR.IntervalReporting(query_type=FDH._QTYPE_CAMPAIGN_ + FDH._QTYPE_TIME_, generate_plot=False, db='db1025') ir_banner = DR.IntervalReporting(query_type=FDH._QTYPE_BANNER_ + FDH._QTYPE_TIME_, generate_plot=False, db='db1025') ir_lp = DR.IntervalReporting(query_type=FDH._QTYPE_LP_ + FDH._QTYPE_TIME_, generate_plot=False, db='db1025') """ Execute queries """ ir_cmpgn.run(start_time, end_time, sampling_interval, 'donations', '',{}) ir_banner.run(start_time, end_time, sampling_interval, 'donations', '',{}) ir_lp.run(start_time, end_time, sampling_interval, 'donations', '',{}) """ Prepare serialized objects """ dict_param = dict() dict_param['metric_legend_table'] = metric_legend_table dict_param['conf_legend_table'] = conf_legend_table dict_param['measured_metrics_counts'] = measured_metrics_counts dict_param['results'] = results dict_param['column_names'] = column_names dict_param['interval'] = sampling_interval dict_param['duration'] = self.DURATION_HRS dict_param['start_time'] = TP.timestamp_convert_format(start_time,1,2) dict_param['end_time'] = TP.timestamp_convert_format(end_time,1,2) dict_param['ir_cmpgn_counts'] = ir_cmpgn._counts_ dict_param['ir_banner_counts'] = ir_banner._counts_ dict_param['ir_lp_counts'] = ir_lp._counts_ dict_param['ir_cmpgn_times'] = ir_cmpgn._times_ dict_param['ir_banner_times'] = ir_banner._times_ dict_param['ir_lp_times'] = ir_lp._times_ self.clear_cached_data(shelve_key) self.cache_data(dict_param, shelve_key) logging.info('Caching complete.')
def process_filter_data(request): err_msg = '' time_curr = datetime.datetime.utcnow() time_dayback = time_curr + datetime.timedelta(hours = -4) _beginning_time_ = TP.timestamp_from_obj(time_dayback, 1, 3) _end_time_ = TP.timestamp_from_obj(time_curr, 1, 3) """ PROCESS POST VARS ================= """ try: latest_utc_ts_var = MySQLdb._mysql.escape_string(request.POST['latest_utc_ts'].strip()) if not(TP.is_timestamp(latest_utc_ts_var, 1)) and not(TP.is_timestamp(latest_utc_ts_var, 2)): raise TypeError if latest_utc_ts_var == '': latest_utc_ts_var = _end_time_ ts_format = TP.getTimestampFormat(latest_utc_ts_var) if ts_format == TP.TS_FORMAT_FORMAT1: latest_utc_ts_var = TP.timestamp_convert_format(latest_utc_ts_var, TP.TS_FORMAT_FORMAT1, TP.TS_FORMAT_FLAT) except KeyError: latest_utc_ts_var = _end_time_ except TypeError: err_msg = 'Please enter a valid end-timestamp.' latest_utc_ts_var = _end_time_ try: earliest_utc_ts_var = MySQLdb._mysql.escape_string(request.POST['earliest_utc_ts'].strip()) if not(TP.is_timestamp(earliest_utc_ts_var, 1)) and not(TP.is_timestamp(earliest_utc_ts_var, 2)): raise TypeError if earliest_utc_ts_var == '': earliest_utc_ts_var = _beginning_time_ ts_format = TP.getTimestampFormat(earliest_utc_ts_var) if ts_format == TP.TS_FORMAT_FORMAT1: earliest_utc_ts_var = TP.timestamp_convert_format(earliest_utc_ts_var, TP.TS_FORMAT_FORMAT1, TP.TS_FORMAT_FLAT) except KeyError: earliest_utc_ts_var = _beginning_time_ except TypeError: err_msg = 'Please enter a valid start-timestamp.' earliest_utc_ts_var = _beginning_time_ return err_msg, earliest_utc_ts_var, latest_utc_ts_var
def daily_totals(request): err_msg = '' start_day_ts = TP.timestamp_from_obj(datetime.datetime.utcnow() + datetime.timedelta(days=-1), 1, 0) end_day_ts = TP.timestamp_from_obj(datetime.datetime.utcnow(), 1, 0) country = '.{2}' min_donation = 0 order_str = 'order by 1 desc,3 desc' """ PROCESS POST """ if 'start_day_ts' in request.POST: if cmp(request.POST['start_day_ts'], '') != 0: start_day_ts = MySQLdb._mysql.escape_string(request.POST['start_day_ts'].strip()) format = TP.getTimestampFormat(start_day_ts) if format == 2: start_day_ts = TP.timestamp_convert_format(start_day_ts, 2, 1) # start_day_ts = start_day_ts[:8] + '000000' elif format == -1: err_msg = err_msg + 'Start timestamp is formatted incorrectly\n' if 'end_day_ts' in request.POST: if cmp(request.POST['end_day_ts'], '') != 0: end_day_ts = MySQLdb._mysql.escape_string(request.POST['end_day_ts'].strip()) format = TP.getTimestampFormat(start_day_ts) if format == 2: end_day_ts = TP.timestamp_convert_format(end_day_ts, 2, 1) # end_day_ts = end_day_ts[:8] + '000000' elif format == -1: err_msg = err_msg + 'End timestamp is formatted incorrectly\n' if 'country' in request.POST: if cmp(request.POST['country'], '') != 0: country = MySQLdb._mysql.escape_string(request.POST['country']) if 'min_donation' in request.POST: if cmp(request.POST['min_donation'], '') != 0: try: min_donation = int(MySQLdb._mysql.escape_string(request.POST['min_donation'].strip())) except: logging.error('live_results/daily_totals -- Could not process minimum donation for "%s" ' % request.POST['min_donation'].strip()) min_donation = 0 if 'order_metric' in request.POST: if cmp(request.POST['order_metric'], 'Date') == 0: order_str = 'order by 1 desc,3 desc' elif cmp(request.POST['order_metric'], 'Country') == 0: order_str = 'order by 2 asc,1 desc' """ === END POST === """ query_name = 'report_daily_totals_by_country' filename = projSet.__sql_home__+ query_name + '.sql' sql_stmnt = Hlp.file_to_string(filename) sql_stmnt = QD.format_query(query_name, sql_stmnt, [start_day_ts, end_day_ts], country=country, min_donation=min_donation, order_str=order_str) dl = DL.DataLoader() results = dl.execute_SQL(sql_stmnt) html_table = DR.DataReporting()._write_html_table(results, dl.get_column_names(), use_standard_metric_names=True) return render_to_response('live_results/daily_totals.html', \ {'html_table' : html_table, 'start_time' : TP.timestamp_convert_format(start_day_ts, 1, 2), 'end_time' : TP.timestamp_convert_format(end_day_ts, 1, 2)}, \ context_instance=RequestContext(request))
def index(request, **kwargs): crl = DL.CampaignReportingLoader(query_type='totals') filter_data = True """ Determine the start and end times for the query """ start_time_obj = datetime.datetime.utcnow() + datetime.timedelta(days=-1) end_time = TP.timestamp_from_obj(datetime.datetime.utcnow(),1,3) start_time = TP.timestamp_from_obj(start_time_obj,1,3) """ PROCESS POST KWARGS =================== """ err_msg = '' try: err_msg = str(kwargs['kwargs']['err_msg']) except: pass """ PROCESS POST VARS ================= """ """ Process error message """ try: err_msg = MySQLdb._mysql.escape_string(request.POST['err_msg']) except KeyError: pass """ If the filter form was submitted extract the POST vars """ try: min_donations_var = MySQLdb._mysql.escape_string(request.POST['min_donations'].strip()) earliest_utc_ts_var = MySQLdb._mysql.escape_string(request.POST['utc_ts'].strip()) """ If the user timestamp is earlier than the default start time run the query for the earlier start time """ ts_format = TP.getTimestampFormat(earliest_utc_ts_var) """ Ensure the validity of the timestamp input """ if ts_format == TP.TS_FORMAT_FORMAT1: start_time = TP.timestamp_convert_format(earliest_utc_ts_var, TP.TS_FORMAT_FORMAT1, TP.TS_FORMAT_FLAT) elif ts_format == TP.TS_FORMAT_FLAT: start_time = earliest_utc_ts_var elif cmp(earliest_utc_ts_var, '') == 0: start_time = TP.timestamp_from_obj(start_time_obj,1,3) else: raise Exception() if cmp(min_donations_var, '') == 0: min_donations_var = -1 else: min_donations_var = int(min_donations_var) except KeyError: # In the case the form was not submitted set minimum donations and retain the default start time min_donations_var = -1 pass except Exception: # In the case the form was incorrectly formatted notify the user min_donations_var = -1 start_time = TP.timestamp_from_obj(start_time_obj,1,3) err_msg = 'Filter fields are incorrect.' """ GENERATE CAMPAIGN DATA ====================== """ campaigns, all_data = crl.run_query({'metric_name' : 'earliest_timestamp', 'start_time' : start_time, 'end_time' : end_time}) """ Sort campaigns by earliest access """ sorted_campaigns = sorted(campaigns.iteritems(), key=operator.itemgetter(1)) sorted_campaigns.reverse() """ FILTER CAMPAIGN DATA ==================== """ new_sorted_campaigns = list() for campaign in sorted_campaigns: key = campaign[0] if campaign[1] > 0: name = all_data[key][0] if name == None: name = 'none' timestamp = TP.timestamp_convert_format(all_data[key][3], 1, 2) if filter_data: if all_data[key][2] > min_donations_var: new_sorted_campaigns.append([campaign[0], campaign[1], name, timestamp, all_data[key][2], all_data[key][4]]) else: new_sorted_campaigns.append([campaign[0], campaign[1], name, timestamp, all_data[key][2], all_data[key][4]]) sorted_campaigns = new_sorted_campaigns return render_to_response('campaigns/index.html', {'campaigns' : sorted_campaigns, 'err_msg' : err_msg}, context_instance=RequestContext(request))