def _telem_parsed_to_sql(date_str, channel, measure_iter, version = None, query_file = _UPDATE_SQL_FILE): """ Loads _INTERMEDIATE_CSV into the sentiment database """ db = Db('telemetry', is_persistent = True) #version query if not version: query = """ SELECT version FROM sentiment.release_info ri WHERE '{date}' >= ri.{channel}_start_date AND '{date}' <= ri.{channel}_end_date ;""".format(date=date_str, channel=channel) version = str(db.execute_sql(query).first()[0]) # TODO(rrayborn): figure out why this won't work as a temp table with our # consistency rules + sqlalchemy's implicit transactions query ='''DROP TABLE IF EXISTS tmp_weekly_stats;''' db.execute_sql(query) query ='''CREATE TABLE tmp_weekly_stats ( os ENUM('Windows','Mac','Linux'), measure VARCHAR(200), measure_value VARCHAR(200), users INT, measure_average FLOAT, measure_nonzero_average FLOAT, active_users FLOAT, potential_users INT );''' db.execute_sql(query) mappings = { 0:'measure', 1:'os', 2:'measure_average', 3:'measure_nonzero_average', 4:'active_users', 5:'potential_users', 6:'measure_value' } db.insert_data_into_table(measure_iter, 'tmp_weekly_stats', mappings) with open(query_file, 'r') as query_sql: if query_sql: query = query_sql.read() db.execute_sql(query, {'week':date_str, 'channel':channel, 'version':version})
def update( product = 'both', start_date = (date.today() - timedelta(days=15)).strftime('%Y-%m-%d'), end_date = (date.today() - timedelta(days=1)).strftime('%Y-%m-%d'), ua_db = None, input_db = None, sumo_db = None ): if not ua_db: ua_db = UA_DB('sentiment', is_persistent = True) ua_db.execute_sql('SET autocommit=1;') if not input_db: input_db = Input_DB(is_persistent = True) if not sumo_db: sumo_db = Sumo_DB(is_persistent = True) adis_file = _DATA_PATH + '.' + product +'_adis.tsv' input_file = _DATA_PATH + '.' + product +'_adis.tsv' visits_file = _DATA_PATH + '.' + product +'_visits.tsv' params = {'start_date': start_date, 'end_date': end_date} # =========== Create tables ============================================ _execute_query(ua_db, _CREATE_SQL_FILE, params=params, multi=True) # =========== Parse heartbeat data ========================================= #TODO(rrayborn): should this be its own pipeline? # Fetch Input data data = _execute_query(input_db, _HEARTBEAT_SQL_FILE, params=params) # Insert Input data header = data.keys() ua_db.insert_data_into_table(data, 'daily_heartbeat_stats', header, has_header = False, is_replace=True) # =========== Parse input data ============================================= # Fetch Input data data = _execute_query(input_db, _INPUT_SQL_FILE, params=params) # Create tmp_input table query ='''CREATE TEMPORARY TABLE tmp_input ( `date` DATE, `version` INT, `is_desktop` BOOL, `input_average` FLOAT, `input_volume` INT, `heartbeat_average` FLOAT, `heartbeat_surveyed_users` INT, `heartbeat_volume` INT );''' ua_db.execute_sql(query) # Insert Input data header = data.keys() ua_db.insert_data_into_table(data, 'tmp_input', header, has_header = False) # =========== Create base table ============================================ _execute_query(ua_db, _BASE_SQL_FILE, params=params) # =========== Parse sumo data ============================================= # Fetch Sumo data data = _execute_query(sumo_db, _SUMO_SQL_FILE, params=params) # Create tmp_sumo table query ='''CREATE TEMPORARY TABLE tmp_sumo ( `date` DATE, `version` INT, `is_desktop` BOOL, `num_unanswered_72` INT, `num_posts` INT );''' ua_db.execute_sql(query) # Insert Sumo data header = data.keys() ua_db.insert_data_into_table(data, 'tmp_sumo', header, has_header = False) # =========== Parse ADI data =============================================== # Generate query # TODO(rrayborn): Need to investigate why part of the end date is missing. # Doesn't seem to affect the start_date... today_minus_three = (date.today() - timedelta(days=3)).strftime('%Y-%m-%d') adi_end_date = min(today_minus_three, end_date) with open(_ADIS_SQL_FILE, 'r') as adis_sql: query = adis_sql.read().replace('\n',' ') % (start_date, adi_end_date) # Generate/execute command line cmd = 'echo "%s" | isql -v metrics_dsn -b -x0x09 >%s' # | tail -n+10' check_output(cmd % (query, adis_file), shell=True) # Create tmp table query ='''CREATE TEMPORARY TABLE tmp_adis ( `date` DATE, version INT, is_desktop BOOL, num_adis INT );''' ua_db.execute_sql(query) header = ['date', 'version', 'is_desktop', 'num_adis'] ua_db.insert_csv_into_table(adis_file, 'tmp_adis', header, delimiter = '\t') # =========== Parse Analytics data ========================================= # Get Google analytics data google_analytics.generate_inproduct( db = ua_db, device_type = product, filename = visits_file, start_date = start_date, end_date = end_date ) # Create tmp table query ='''CREATE TEMPORARY TABLE tmp_sumo_visits ( `date` DATE, version INT, is_desktop BOOL, visits INT );''' ua_db.execute_sql(query) header = ['date', 'version', 'is_desktop', 'visits'] ua_db.insert_csv_into_table(visits_file, 'tmp_sumo_visits', header, delimiter = '\t') # =========== Parse Play data ============================================== query = '''CREATE TEMPORARY TABLE tmp_play AS SELECT `date`, version, AVG(rating) AS play_average, COUNT(*) AS play_volume FROM google_play_reviews WHERE `date` >= :start_date AND `date` <= :end_date GROUP BY 1,2; ''' ua_db.execute_sql(query, params) # =========== Run Stats query ============================================== query_files = [] if product == 'both' or product == 'desktop': query_files.append(_DESKTOP_FILE_PATTERN) if product == 'both' or product == 'mobile': query_files.append(_MOBILE_FILE_PATTERN) for query_file in query_files: _execute_query(ua_db,query_file, params=params)
def update(product='both', start_date=(date.today() - timedelta(days=15)).strftime('%Y-%m-%d'), end_date=(date.today() - timedelta(days=1)).strftime('%Y-%m-%d'), ua_db=None, input_db=None, sumo_db=None): if not ua_db: ua_db = UA_DB('sentiment', is_persistent=True) ua_db.execute_sql('SET autocommit=1;') if not input_db: input_db = Input_DB(is_persistent=True) if not sumo_db: sumo_db = Sumo_DB(is_persistent=True) adis_file = _DATA_PATH + '.' + product + '_adis.tsv' input_file = _DATA_PATH + '.' + product + '_adis.tsv' visits_file = _DATA_PATH + '.' + product + '_visits.tsv' params = {'start_date': start_date, 'end_date': end_date} # =========== Create tables ============================================ _execute_query(ua_db, _CREATE_SQL_FILE, params=params, multi=True) # =========== Parse heartbeat data ========================================= #TODO(rrayborn): should this be its own pipeline? # Fetch Input data data = _execute_query(input_db, _HEARTBEAT_SQL_FILE, params=params) # Insert Input data header = data.keys() ua_db.insert_data_into_table(data, 'daily_heartbeat_stats', header, has_header=False, is_replace=True) # =========== Parse input data ============================================= # Fetch Input data data = _execute_query(input_db, _INPUT_SQL_FILE, params=params) # Create tmp_input table query = '''CREATE TEMPORARY TABLE tmp_input ( `date` DATE, `version` INT, `is_desktop` BOOL, `input_average` FLOAT, `input_volume` INT, `heartbeat_average` FLOAT, `heartbeat_surveyed_users` INT, `heartbeat_volume` INT );''' ua_db.execute_sql(query) # Insert Input data header = data.keys() ua_db.insert_data_into_table(data, 'tmp_input', header, has_header=False) # =========== Create base table ============================================ _execute_query(ua_db, _BASE_SQL_FILE, params=params) # =========== Parse sumo data ============================================= # Fetch Sumo data data = _execute_query(sumo_db, _SUMO_SQL_FILE, params=params) # Create tmp_sumo table query = '''CREATE TEMPORARY TABLE tmp_sumo ( `date` DATE, `version` INT, `is_desktop` BOOL, `num_unanswered_72` INT, `num_posts` INT );''' ua_db.execute_sql(query) # Insert Sumo data header = data.keys() ua_db.insert_data_into_table(data, 'tmp_sumo', header, has_header=False) # =========== Parse ADI data =============================================== # Generate query # TODO(rrayborn): Need to investigate why part of the end date is missing. # Doesn't seem to affect the start_date... today_minus_three = (date.today() - timedelta(days=3)).strftime('%Y-%m-%d') adi_end_date = min(today_minus_three, end_date) with open(_ADIS_SQL_FILE, 'r') as adis_sql: query = adis_sql.read().replace('\n', ' ') % (start_date, adi_end_date) # Generate/execute command line cmd = 'echo "%s" | isql -v metrics_dsn -b -x0x09 >%s' # | tail -n+10' check_output(cmd % (query, adis_file), shell=True) # Create tmp table query = '''CREATE TEMPORARY TABLE tmp_adis ( `date` DATE, version INT, is_desktop BOOL, num_adis INT );''' ua_db.execute_sql(query) header = ['date', 'version', 'is_desktop', 'num_adis'] ua_db.insert_csv_into_table(adis_file, 'tmp_adis', header, delimiter='\t') # =========== Parse Analytics data ========================================= # Get Google analytics data google_analytics.generate_inproduct(db=ua_db, device_type=product, filename=visits_file, start_date=start_date, end_date=end_date) # Create tmp table query = '''CREATE TEMPORARY TABLE tmp_sumo_visits ( `date` DATE, version INT, is_desktop BOOL, visits INT );''' ua_db.execute_sql(query) header = ['date', 'version', 'is_desktop', 'visits'] ua_db.insert_csv_into_table(visits_file, 'tmp_sumo_visits', header, delimiter='\t') # =========== Parse Play data ============================================== query = '''CREATE TEMPORARY TABLE tmp_play AS SELECT `date`, version, AVG(rating) AS play_average, COUNT(*) AS play_volume FROM google_play_reviews WHERE `date` >= :start_date AND `date` <= :end_date GROUP BY 1,2; ''' ua_db.execute_sql(query, params) # =========== Run Stats query ============================================== query_files = [] if product == 'both' or product == 'desktop': query_files.append(_DESKTOP_FILE_PATTERN) if product == 'both' or product == 'mobile': query_files.append(_MOBILE_FILE_PATTERN) for query_file in query_files: _execute_query(ua_db, query_file, params=params)