示例#1
0
def getLastUpdateDates(db_conf_file, sc_schema, resources):
	print('Getting last update dates...')
	conn = getDatabaseConnection(db_conf_file)
	cur = conn.cursor()
	
	last_updates_dict = dict()
	for resource in resources:
		target_table_name = 'stg_' + COUNTRY + '_' + resource[4:]	# Target table name has the country in the middle of the source table name (for example, stg_d_base_contacts -> stg_pt_d_base_contacts)
		scai_process_name = scai.getProcessShortDescription(db_conf_file, target_table_name)
		cur.execute(
			"SELECT isnull(rel_integr_proc.last_processing_datetime,'1900-01-01 00:00:00.000000') "\
			"FROM crm_integration_anlt.t_lkp_scai_process proc, crm_integration_anlt.t_rel_scai_integration_process rel_integr_proc "\
			"WHERE rel_integr_proc.cod_process = proc.cod_process "\
			"AND rel_integr_proc.cod_country = %(COD_COUNTRY)d "\
			"AND rel_integr_proc.cod_integration = %(COD_INTEGRATION)d "\
			"AND rel_integr_proc.ind_active = 1 "\
			"AND proc.dsc_process_short = '%(scai_process_name)s' "\
			% {
				'COD_COUNTRY':COD_COUNTRY,
				'COD_INTEGRATION':COD_INTEGRATION,
				'scai_process_name':scai_process_name
			}
		)
		last_updates_dict[resource] = cur.fetchone()[0].isoformat()
		print('\t' + target_table_name + ': ' + last_updates_dict[resource])

	cur.close()
	conn.close()

	return last_updates_dict
示例#2
0
def copyFromDatabaseToS3(source_conf, target_conf, resources, schema, last_updates_dict, aux_path, scai_last_execution_status=1):
	print('Connecting to Chandra...')
	conn = getDatabaseConnection(source_conf)
	cur = conn.cursor()
	credentials = getS3Keys(source_conf)
	sc_conf = json.load(open(source_conf))
	
	#UNLOAD resources data	
	print('Unloading from Chandra...')
	for resource in resources:
		print('\t' + resource + ": " + last_updates_dict[resource])
		tg_table = 'stg_' + COUNTRY + '_' + resource[4:]	# Target table name has the country in the middle of the source table name (for example, stg_d_base_contacts -> stg_pt_d_base_contacts)
		scai_process_name = scai.getProcessShortDescription(target_conf, tg_table)				# SCAI
		
		if(scai_last_execution_status==3):
			scai_process_status = scai.processCheck(target_conf, scai_process_name, COD_INTEGRATION, COD_COUNTRY,scai_last_execution_status)	# SCAI
				
		# Is normal execution or re-execution starting from the step that was in error	
		if (scai_last_execution_status == 1 or (scai_last_execution_status == 3 and scai_process_status == 3)):
			scai.processStart(target_conf, scai_process_name, COD_INTEGRATION, COD_COUNTRY)			# SCAI
			try:
				cur.execute(
					"UNLOAD ('SELECT * from %(schema)s.%(resource)s "\
					"		WHERE meta_event_time >= \\\'%(last_update_date)s\\\' "\
					"		AND base_account_country = \\\'%(BASE_ACCOUNT_COUNTRY)s\\\'') "\
					"TO 's3://%(aux_path)s/%(schema)s_%(resource)s/data_' "\
					"CREDENTIALS '%(credentials)s' "\
					"ESCAPE "\
					"manifest;"
				% {
				'schema':schema,
				'resource':resource,
				'last_update_date':last_updates_dict[resource],
				'credentials':credentials,
				'aux_path':aux_path,
				'BASE_ACCOUNT_COUNTRY':BASE_ACCOUNT_COUNTRY
				}		
				)
			except Exception as e:
				conn.rollback()
				scai.processEnd(target_conf, scai_process_name, COD_INTEGRATION, COD_COUNTRY, tg_table, 'meta_event_time',3)	# SCAI
				scai.integrationEnd(target_conf, COD_INTEGRATION, COD_COUNTRY, 3)		# SCAI
				print (e)
				print (e.pgerror)
				sys.exit("The process aborted with error.")
			else:
				conn.commit()
				
				#Enable execution of following processes
				scai_last_execution_status = 1


	#Close connection
	cur.close()
	conn.close()
示例#3
0
def copyFromS3ToDatabase(target_conf, resources, sc_schema, tg_schema, aux_path, scai_last_execution_status=1):		
	#LOAD to target redshift
	print('Connecting to Yamato...')
	conn_target = getDatabaseConnection(target_conf)
	cur_target = conn_target.cursor()
	credentials = getS3Keys(target_conf)

	print('Loading to Yamato...')
	for resource in resources:
		tg_table = 'stg_' + COUNTRY + '_' + resource[4:]	# Target table name has the country in the middle of the source table name (for example, stg_d_base_contacts -> stg_pt_d_base_contacts)
		print('Loading %(tg_schema)s.%(tg_table)s...' % {'tg_schema':tg_schema, 'tg_table':tg_table })
		scai_process_name = scai.getProcessShortDescription(target_conf, tg_table)				# SCAI
		
		if(scai_last_execution_status==3):
			scai_process_status = scai.processCheck(target_conf, scai_process_name, COD_INTEGRATION, COD_COUNTRY,scai_last_execution_status)	# SCAI
				
		# Is normal execution or re-execution starting from the step that was in error	
		if (scai_last_execution_status == 1 or (scai_last_execution_status == 3 and scai_process_status == 3)):
		
			try:
				cur_target.execute(
					"TRUNCATE TABLE %(tg_schema)s.%(tg_table)s; "\
					"COPY %(tg_schema)s.%(tg_table)s "\
					"FROM 's3://%(aux_path)s/%(sc_schema)s_%(resource)s/data_manifest' "\
					"CREDENTIALS '%(credentials)s' "\
					"REGION 'us-west-2' "\
					"ESCAPE "\
					"manifest; "\
					"ANALYZE %(tg_schema)s.%(tg_table)s;"
				% {
				'tg_schema':tg_schema,
				'tg_table':tg_table,
				'resource':resource,
				'credentials':credentials,
				'aux_path':aux_path,
				'sc_schema':sc_schema
				}	
				)
			except Exception as e:
				conn_target.rollback()
				scai.processEnd(target_conf, scai_process_name, COD_INTEGRATION, COD_COUNTRY, tg_table, 'meta_event_time',3)	# SCAI
				scai.integrationEnd(target_conf, COD_INTEGRATION, COD_COUNTRY, 3)		# SCAI
				print (e)
				print (e.pgerror)
				sys.exit("The process aborted with error.")
			else:
				conn_target.commit()
				scai.processEnd(target_conf, scai_process_name, COD_INTEGRATION, COD_COUNTRY, tg_table, 'meta_event_time',1)	# SCAI
				
				#Enable execution of following processes
				scai_last_execution_status = 1

	cur_target.close()
	conn_target.close()
示例#4
0
def copyBaseTables(db_conf_file, sc_schema, tg_schema, resources, last_updates_dict, verticals_names='', scai_last_execution_status=1):
	print('Connecting to Yamato...')
	conn_target = getDatabaseConnection(db_conf_file)
	cur_target = conn_target.cursor()
	
	for resource in resources:	
		tg_table = 'stg_' + COUNTRY + '_' + resource[4:]	# Target table name has the country in the middle of the source table name (for example, stg_d_base_contacts -> stg_pt_d_base_contacts)
		scai_process_name = scai.getProcessShortDescription(db_conf_file, tg_table)			# SCAI
		if(scai_last_execution_status==3):
			scai_process_status = scai.processCheck(db_conf_file, scai_process_name, COD_INTEGRATION, COD_COUNTRY,scai_last_execution_status)	# SCAI

		# Is normal execution or re-execution starting from the step that was in error	
		if (scai_last_execution_status == 1 or (scai_last_execution_status == 3 and scai_process_status == 3)):
			#scai.processStart(db_conf_file, scai_process_name, COD_INTEGRATION, COD_COUNTRY)	# SCAI
			print('Loading %(tg_schema)s.%(tg_table)s from %(last_update)s...' % {'tg_schema':tg_schema, 'tg_table':tg_table, 'last_update':last_updates_dict[resource]})
			try:
				cur_target.execute(
					"TRUNCATE TABLE %(tg_schema)s.%(tg_table)s; "\
					"INSERT INTO %(tg_schema)s.%(tg_table)s "\
					"SELECT * FROM %(sc_schema)s.%(resource)s "\
					"WHERE operation_timestamp >= '%(last_update_date)s'; "\
					"ANALYZE %(tg_schema)s.%(tg_table)s;"
				% {
				'tg_table':tg_table,
				'tg_schema':tg_schema,
				'sc_schema':sc_schema,
				'resource':resource,
				'last_update_date':last_updates_dict[resource]
				}	
				) 
			except Exception as e:
				conn_target.rollback()
				#scai.processEnd(db_conf_file, scai_process_name, COD_INTEGRATION, COD_COUNTRY, tg_table, 'operation_timestamp',3)	# SCAI
				#scai.integrationEnd(db_conf_file, COD_INTEGRATION, COD_COUNTRY, 3)		# SCAI
				print (e)
				print (e.pgerror)
				sys.exit("The process aborted with error.")
			else:
				conn_target.commit()
				#scai.processEnd(db_conf_file, scai_process_name, COD_INTEGRATION, COD_COUNTRY, tg_table, 'operation_timestamp',1)	# SCAI
				
				#Enable execution of following processes
				scai_last_execution_status = 1
	
	cur_target.close()
	conn_target.close()
	
	# If error was solved here, return new status to use in subsequent processes
	return scai_last_execution_status
示例#5
0
def copyHydraTable(db_conf_file,
                   sc_schema,
                   tg_schema,
                   resource,
                   last_update_date,
                   horizontal_name,
                   scai_last_execution_status=1):
    print('Connecting to Yamato...')
    conn = getDatabaseConnection(db_conf_file)
    cur = conn.cursor()

    tg_table = 'stg_%(COUNTRY)s_%(sc_schema)s_%(resource)s' % {
        'resource': resource,
        'sc_schema': sc_schema,
        'COUNTRY': COUNTRY
    }
    scai_process_name = scai.getProcessShortDescription(
        db_conf_file, tg_table)  # SCAI
    if (scai_last_execution_status == 3):
        scai_process_status = scai.processCheck(
            db_conf_file, scai_process_name, COD_INTEGRATION, COD_COUNTRY,
            scai_last_execution_status)  # SCAI

    # Is normal execution or re-execution starting from the step that was in error
    if (scai_last_execution_status == 1
            or (scai_last_execution_status == 3 and scai_process_status == 3)):
        scai.processStart(db_conf_file, scai_process_name, COD_INTEGRATION,
                          COD_COUNTRY)  # SCAI
        print(
            'Loading %(tg_schema)s.%(tg_table)s from %(last_update)s...' % {
                'tg_schema': tg_schema,
                'tg_table': tg_table,
                'last_update': last_update_date
            })
        try:
            cur.execute(
             "TRUNCATE TABLE %(tg_schema)s.%(tg_table)s; "\
             "INSERT INTO %(tg_schema)s.%(tg_table)s "\
             "SELECT "\
             "	server_date_day, "\
             "	ad_id, "\
             "	action_type, "\
             "	%(horizontal_name)s source, "\
             "	count(*) occurrences, "\
             "	count(distinct session_long) distinct_occurrences "\
             "FROM hydra.web "\
             "WHERE upper(country_code) = '%(HYDRA_COUNTRY_CODE)s' "\
             "AND ad_id is not null "\
             "AND server_date_day >= '%(last_update_date)s' "\
             "GROUP BY server_date_day, ad_id, action_type; "\
             "ANALYZE %(tg_schema)s.%(tg_table)s;"
            % {
            'tg_table':tg_table,
            'tg_schema':tg_schema,
            'horizontal_name':horizontal_name,
            'HYDRA_COUNTRY_CODE':HYDRA_COUNTRY_CODE,
            'last_update_date':last_update_date
            }
            )
        except Exception as e:
            conn.rollback()
            scai.processEnd(db_conf_file, scai_process_name, COD_INTEGRATION,
                            COD_COUNTRY, tg_table, 'server_date_day',
                            3)  # SCAI
            scai.integrationEnd(db_conf_file, COD_INTEGRATION, COD_COUNTRY,
                                3)  # SCAI
            print(e)
            print(e.pgerror)
            sys.exit("The process aborted with error.")
        else:
            conn.commit()
            scai.processEnd(db_conf_file, scai_process_name, COD_INTEGRATION,
                            COD_COUNTRY, tg_table, 'server_date_day', 1)

            #Enable execution of following processes
            scai_last_execution_status = 1  # SCAI

    cur.close()
    cur.close()

    # If error was solved here, return new status to use in subsequent processes
    return scai_last_execution_status
示例#6
0
def copyHydraVerticalsTable(db_conf_file,
                            sc_schema,
                            tg_schema,
                            resource,
                            last_update_date,
                            hydra_verticals_names,
                            anlt_verticals_names,
                            scai_last_execution_status=1):
    print('Connecting to Yamato...')
    conn = getDatabaseConnection(db_conf_file)
    cur = conn.cursor()

    tg_table = 'stg_%(COUNTRY)s_%(sc_schema)s_%(resource)s' % {
        'resource': resource,
        'sc_schema': sc_schema,
        'COUNTRY': COUNTRY
    }
    scai_process_name = scai.getProcessShortDescription(
        db_conf_file, tg_table)  # SCAI

    if (scai_last_execution_status == 3):
        scai_process_status = scai.processCheck(
            db_conf_file, scai_process_name, COD_INTEGRATION, COD_COUNTRY,
            scai_last_execution_status)  # SCAI

    # Is normal execution or re-execution starting from the step that was in error
    if (scai_last_execution_status == 1
            or (scai_last_execution_status == 3 and scai_process_status == 3)):
        scai.processStart(db_conf_file, scai_process_name, COD_INTEGRATION,
                          COD_COUNTRY)  # SCAI
        print(
            'Loading %(tg_schema)s.%(tg_table)s from %(last_update)s...' % {
                'tg_schema': tg_schema,
                'tg_table': tg_table,
                'last_update': last_update_date
            })

        # Dynamically build CASE statement according to number of verticals
        case_statement = "CASE"
        for i in range(len(anlt_verticals_names)):
            case_statement += " WHEN lower(host) LIKE '%%" + hydra_verticals_names[
                i] + "%%' THEN " + anlt_verticals_names[i]
        case_statement += " ELSE 'other' END"

        try:
            cur.execute(
             "TRUNCATE TABLE %(tg_schema)s.%(tg_table)s; "\
             "INSERT INTO %(tg_schema)s.%(tg_table)s "\
             "SELECT "\
             "	server_date_day, "\
             "	ad_id, "\
             "	trackname, "\
             "	%(case_statement)s source, "\
             "	count(*) occurrences, "\
             "	count(distinct session_long) distinct_occurrences "\
             "FROM hydra_verticals.web "\
             "WHERE upper(country_code) = '%(HYDRA_COUNTRY_CODE)s' "\
             "AND ad_id is not null "\
             "AND server_date_day >= '%(last_update_date)s' "\
             "GROUP BY server_date_day, ad_id, trackname, "\
             "	%(case_statement)s; "\
             "ANALYZE %(tg_schema)s.%(tg_table)s;"
            % {
            'tg_table':tg_table,
            'tg_schema':tg_schema,
            'HYDRA_COUNTRY_CODE':HYDRA_COUNTRY_CODE,
            'last_update_date':last_update_date,
            'case_statement':case_statement
            }
            )
        except Exception as e:
            conn.rollback()
            scai.processEnd(db_conf_file, scai_process_name, COD_INTEGRATION,
                            COD_COUNTRY, tg_table, 'server_date_day',
                            3)  # SCAI
            scai.integrationEnd(db_conf_file, COD_INTEGRATION, COD_COUNTRY,
                                3)  # SCAI
            print(e)
            print(e.pgerror)
            sys.exit("The process aborted with error.")
        else:
            conn.commit()
            scai.processEnd(db_conf_file, scai_process_name, COD_INTEGRATION,
                            COD_COUNTRY, tg_table, 'server_date_day',
                            1)  # SCAI

            #Enable execution of following processes
            scai_last_execution_status = 1

    cur.close()
    cur.close()

    # If error was solved here, return new status to use in subsequent processes
    return scai_last_execution_status