def call_snowplow(request_id,json_object):
    
    # Use the global emitter and tracker dicts
    global e
    global t

    # callbacks are documented in
    # - https://github.com/snowplow/snowplow/wiki/Python-Tracker#emitters

    # callback for passed calls
    def on_success(successfully_sent_count):
        log("INFO","Emitter call PASSED on request_id: {}.".format(request_id))
        backoff_outside_the_scope = 1 # reset the backoff if this is successful
        # get previous try number, choose larger of 0 or query result and add 1
        try_number = max(i for i in [0,db_query("SELECT MAX(try_number) FROM caps.snowplow_calls WHERE request_id = %s ;", (request_id, ))[0]] if i is not None) + 1
        log("DEBUG","Try number: {}".format(try_number))
        snowplow_tuple = (
            str(request_id),
            str(200),
            str(try_number),
            json_object['env'],
            json_object['namespace'],
            json_object['app_id'],
            json_object['dvce_created_tstamp'],
            json.dumps(json_object['event_data_json'])
        )
        snowplow_id = db_query(snowplow_calls_sql, snowplow_tuple)[0]
        log("INFO","snowplow call table insertion PASSED on request_id: {} and snowplow_id: {}.".format(request_id, snowplow_id))
    
    # callback for failed calls
    failed_try = 0
    def on_failure(successfully_sent_count, failed_events):
        # increment the failed try
        nonlocal failed_try
        failed_try += 1

        # sleep according to the number indexed by failed_try in the fibonacci sequence
        sleep_time = binets_formula(failed_try)
        #log("INFO","Emitter call FAILED on request_id {} on try {}. Seconds until re-attempt: {}.".format(request_id,failed_try,sleep_time))
        log("INFO","Emitter call FAILED on request_id {} on try {}. No re-attempt will be made.".format(request_id,failed_try))
        
        # Leaving this sleep delay until inputting after a failed event is ready
        #sleep(sleep_time)

        # failed_events should always contain only one event, because ASyncEmitter has a buffer size of 1
        for event in failed_events:
            # get previous try number, choose larger of 0 or query result and add 1
            # try_number = max(i for i in [0,db_query("SELECT MAX(try_number) FROM caps.snowplow_calls WHERE request_id = %s ;", (request_id, ))[0]] if i is not None) + 1
            # log("DEBUG","Try number: {}".format(try_number))
            snowplow_tuple = (
                str(request_id),
                str(400),
                str(failed_try),
                json_object['env'],
                json_object['namespace'],
                json_object['app_id'],
                json_object['dvce_created_tstamp'],
                json.dumps(json_object['event_data_json'])
            )
            snowplow_id = db_query(snowplow_calls_sql, snowplow_tuple)[0]
            log("INFO","snowplow call table insertion PASSED on request_id: {} and snowplow_id: {}.".format(request_id, snowplow_id))
            # Re-attempt the event call by inputting it back to the emitter
            #e[tracker_identifier].input(event)
    
    tracker_identifier = json_object['env'] + "-" + json_object['namespace'] + "-" + json_object['app_id']
    log("DEBUG","New request with tracker_identifier {}".format(tracker_identifier))

    # logic to switch between SPM and Production Snowplow.
    # TODO: Fix SSL problem so to omit the anonymization proxy, since we connect from a Gov IP, not a personal machine
    sp_endpoint = os.getenv("SP_ENDPOINT_{}".format(json_object['env'].upper()))
    log("DEBUG","Using Snowplow Endpoint {}".format(sp_endpoint))

    # Set up the emitter and tracker. If there is already one for this combination of env, namespace, and app-id, reuse it
    # TODO: add error checking
    if tracker_identifier not in e:
        # defaults to a GET method, defaults to a buffer size of 1; buffer is flushed once full.
        e[tracker_identifier] = AsyncEmitter(sp_endpoint, protocol="https", on_success=on_success, on_failure=on_failure)
    if tracker_identifier not in t:
        t[tracker_identifier] = Tracker(e[tracker_identifier], encode_base64=False, app_id=json_object['app_id'], namespace=json_object['namespace'])

    # Build event JSON
    # TODO: add error checking
    event = SelfDescribingJson(json_object['event_data_json']['schema'], json_object['event_data_json']['data'])
    # Build contexts
    # TODO: add error checking
    contexts = [] 
    for context in json_object['event_data_json']['contexts']:
        contexts.append(SelfDescribingJson(context['schema'], context['data']))

    # Send call to Snowplow
    # TODO: add error checking
    t[tracker_identifier].track_self_describing_event(event, contexts, tstamp=json_object['dvce_created_tstamp'])
示例#2
0
                "channel": snowplow_channel,
                "program_id": svc_code,
                "parent_id": pgm_code,
                "program_name": pgm_name,
                "transaction_name": svc_name
            })

        return chooseservice

    @staticmethod
    def get_finish(svc_quantity, accurate_time):
        inaccurate_flag = accurate_time != 1
        finishservice = SelfDescribingJson(
            'iglu:ca.bc.gov.cfmspoc/finish/jsonschema/1-0-0', {
                "inaccurate_time": inaccurate_flag,
                "count": svc_quantity
            })
        return finishservice


# Set up core Snowplow environment
if SnowPlow.call_snowplow_flag:
    s = Subject()  # .set_platform("app")
    e = AsyncEmitter(SnowPlow.sp_endpoint,
                     on_failure=SnowPlow.failure,
                     protocol="https")
    t = Tracker(e,
                encode_base64=False,
                app_id=SnowPlow.sp_appid,
                namespace=SnowPlow.sp_namespace)
示例#3
0
文件: tracking.py 项目: menetrier/dbt
import logging

logger = logging.getLogger(__name__)

sp_logger.setLevel(100)

COLLECTOR_URL = "events.fivetran.com/snowplow/forgiving_ain"
COLLECTOR_PROTOCOL = "https"

COOKIE_PATH = os.path.join(os.path.expanduser('~'), '.dbt/.user.yml')

INVOCATION_SPEC = "https://raw.githubusercontent.com/analyst-collective/dbt/master/events/schemas/com.fishtownanalytics/invocation_event.json"
PLATFORM_SPEC   = "https://raw.githubusercontent.com/analyst-collective/dbt/master/events/schemas/com.fishtownanalytics/platform_context.json"
RUN_MODEL_SPEC  = "https://raw.githubusercontent.com/analyst-collective/dbt/master/events/schemas/com.fishtownanalytics/run_model_context.json"

emitter = AsyncEmitter(COLLECTOR_URL, protocol=COLLECTOR_PROTOCOL, buffer_size=1)
tracker = Tracker(emitter, namespace="cf", app_id="dbt")

def __write_user():
    user = {
        "id": str(uuid.uuid4())
    }

    cookie_dir = os.path.dirname(COOKIE_PATH)
    if not os.path.exists(cookie_dir):
        os.makedirs(cookie_dir)

    with open(COOKIE_PATH, "w") as fh:
        yaml.dump(user, fh)

    return user
 def load(self, options: PluginLoadOptions) -> None:
     self._logger = options.logger
     emitter = AsyncEmitter(**self._options._asdict(), )
     self._tracker = Tracker(emitter)
示例#5
0
#See https://github.com/snowplow/snowplow/wiki/Python-Tracker
#   and https://github.com/snowplow-proservices/ca.bc.gov-schema-registry
from snowplow_tracker import Subject, Tracker, AsyncEmitter
from snowplow_tracker import SelfDescribingJson
import time
import random

# Set up core Snowplow environment
s = Subject()  #.set_platform("app")
e = AsyncEmitter("spm.gov.bc.ca", protocol="https")
t = Tracker(e, encode_base64=False, app_id='demo')

# get time stamp to create new "citizen" (be sure to convert to a string)
client_id = int(time.time())

# Set some sample values for example events
citizen = SelfDescribingJson('iglu:ca.bc.gov.cfmspoc/citizen/jsonschema/3-0-0',
                             {
                                 "client_id": client_id,
                                 "service_count": 1,
                                 "quick_txn": False
                             })

office = SelfDescribingJson('iglu:ca.bc.gov.cfmspoc/office/jsonschema/1-0-0', {
    "office_id": 8,
    "office_type": "non-reception"
})

agent = SelfDescribingJson('iglu:ca.bc.gov.cfmspoc/agent/jsonschema/2-0-0', {
    "agent_id": 42,
    "role": "CSR",
示例#6
0
    True,
    "DEFAULT_PAGINATOR_INSPECTORS": [
        'vcr_server.inspector.PageNumberPaginatorInspectorClass',
    ],
}

CRED_TYPE_SYNONYMS = {
    "registration": "registration.registries.ca",
    "relationship": "relationship.registries.ca",
    "business_number": "relationship.registries.ca",
}

# Set up core Snowplow environment for api tracking
SP_APP_ID = os.getenv("SP_TRACKING_APP_ID", "orgbook_api_local_dev")
SP_EMITTER = AsyncEmitter(os.getenv("SP_TRACKING_EMITTER",
                                    "spm.apps.gov.bc.ca"),
                          protocol=os.getenv("SP_TRACKING_EMITTER_PROTOCOL",
                                             "https"))
SP_TRACKER = Tracker(SP_EMITTER, encode_base64=False, app_id=SP_APP_ID)

LOGIN_URL = "rest_framework:login"
LOGOUT_URL = "rest_framework:logout"

# Internationalization
# https://docs.djangoproject.com/en/1.9/topics/i18n/

LANGUAGE_CODE = "en-us"

TIME_ZONE = "UTC"

USE_I18N = True
def call_snowplow(request_id, json_object):
    '''Callback executed when an emitter is flushed successfully'''
    # Debugging request_id to see if it's being evaluated by the callbacks
    logger.info("Request ID on call_snowplow function: %s", request_id)

    # Use the global emitter and tracker dicts
    global e
    global t

    def callback_log_inscope():
        logger.info("callback_log_inscope has Request ID: %s", request_id)

    # callbacks are documented in
    # - https://github.com/snowplow/snowplow/wiki/Python-Tracker#emitters

    # callback for passed calls
    def on_success(successfully_sent_count):
        logger.info('\'on_success\' callback with %s successful events',
                    successfully_sent_count)
        callback_log_inscope()
        logger.info("Emitter call PASSED on request_id: %s.", request_id)
        # get previous try number, choose larger of 0 or query result and add 1
        max_try_number_query = ("SELECT MAX(try_number) "
                                "FROM caps.snowplow_calls "
                                "WHERE request_id = %s ;")
        try_number = max(i for i in [
            0,
            single_response_query(max_try_number_query, (request_id, ))[0]
        ] if i is not None) + 1
        logger.debug("Try number: %s", try_number)
        snowplow_tuple = (str(request_id), str(200), str(try_number),
                          json_object['env'], json_object['namespace'],
                          json_object['app_id'],
                          json_object['dvce_created_tstamp'],
                          json.dumps(json_object['event_data_json']))
        snowplow_id = single_response_query(snowplow_calls_sql,
                                            snowplow_tuple)[0]
        logger.info(
            "snowplow call table insertion PASSED on "
            "request_id: %s and snowplow_id: %s.", request_id, snowplow_id)

    # callback for failed calls
    failed_try = 0

    def on_failure(successfully_sent_count, failed_events):
        '''Callback executed when an emitter flush results in any failures'''
        # increment the failed try
        logger.warning(
            '\'on_failure\' callback: %s events successfully '
            'emitted, %s events returned by emitter with an error '
            'response', successfully_sent_count, len(failed_events))
        nonlocal failed_try
        failed_try += 1

        logger.info(
            'Emitter call FAILED on request_id %s on try %s. '
            'No re-attempt will be made.', request_id, failed_try)

        # failed_events should always contain only one event,
        # because ASyncEmitter has a buffer size of 1
        for event in failed_events:
            logger.warning('event failure: %s', event)
            snowplow_tuple = (str(request_id), str(400), str(failed_try),
                              json_object['env'], json_object['namespace'],
                              json_object['app_id'],
                              json_object['dvce_created_tstamp'],
                              json.dumps(json_object['event_data_json']))
            snowplow_id = single_response_query(snowplow_calls_sql,
                                                snowplow_tuple)[0]
            logger.info(
                "snowplow call table insertion PASSED on request_id: "
                "%s and snowplow_id: %s.", request_id, snowplow_id)
            # Re-attempt the event call by inputting it back to the emitter

    tracker_identifier = "{}-{}-{}".format(json_object['env'],
                                           json_object['namespace'],
                                           json_object['app_id'])
    logger.debug("New request with tracker_identifier %s", tracker_identifier)

    # logic to switch between SPM and Production Snowplow.
    sp_route = os.getenv("SP_ENDPOINT_{}".format(json_object['env'].upper()))
    logger.debug("Using Snowplow Endpoint %s", sp_route)

    # Set up the emitter and tracker. If there is already one for this
    # combination of env, namespace, and app-id, reuse it
    # TODO: add error checking
    # TEMP COMMENTED OUT TO AVOID USING THE GLOBAL DICT OF EMITTERS/TRACKERS
    # if tracker_identifier not in e:
    #     e[tracker_identifier] = AsyncEmitter(
    #         sp_route,
    #         protocol="https",
    #         on_success=on_success,
    #         on_failure=on_failure)
    #
    # if tracker_identifier not in t:
    #     t[tracker_identifier] = Tracker(
    #         e[tracker_identifier],
    #         encode_base64=False,
    #         app_id=json_object['app_id'],
    #         namespace=json_object['namespace'])

    this_ASyncEmitter = AsyncEmitter(sp_route,
                                     protocol="https",
                                     on_success=on_success,
                                     on_failure=on_failure)
    this_Tracker = Tracker(this_ASyncEmitter,
                           encode_base64=False,
                           app_id=json_object['app_id'],
                           namespace=json_object['namespace'])

    # Build event JSON
    # TODO: add error checking
    event = SelfDescribingJson(json_object['event_data_json']['schema'],
                               json_object['event_data_json']['data'])
    # Build contexts
    # TODO: add error checking
    contexts = []
    for context in json_object['event_data_json']['contexts']:
        contexts.append(SelfDescribingJson(context['schema'], context['data']))

    # Send call to Snowplow
    # TODO: add error checking
    # TEMP COMMENTED OUT TO AVOID USING THE GLOBAL DICT OF EMITTERS/TRACKERS
    # t[tracker_identifier].track_self_describing_event(
    #     event, contexts, tstamp=json_object['dvce_created_tstamp'])

    this_Tracker.track_self_describing_event(
        event, contexts, tstamp=json_object['dvce_created_tstamp'])