Exemplo n.º 1
0
def listen():
  print "attach"

  # Kafka
  consumer = KafkaConsumer(bootstrap_servers=os.environ["KAFKA_BOOTSTRAP_SRVS"], group_id=os.environ["KAFKA_GROUP_ID"])
  consumer.subscribe([os.environ["KAFKA_SOURCE_TOPIC"]])

  # Snowplow
  e = Emitter(os.environ["SP_COLLECTOR_URI"],protocol=os.environ["SP_COLLECTOR_PROTOCOL"],port=int(os.environ["SP_COLLECTOR_PORT"]),method=os.environ["SP_COLLECTOR_METHOD"])
  t = Tracker(emitters=e,namespace="cf",app_id=str(os.environ["APP_ID"]),encode_base64=True)

  for msg in consumer:
    #
    try:
      indata = json.loads(msg.value)
      
      s1 = Subject()
      s1.set_platform("app")
      s1.set_user_id("??")
      s1.set_lang("??")
      s1.set_ip_address("0.0.0.0")
      s1.set_useragent("??")
      
      t.set_subject(s1)

      t.track_self_describing_event(SelfDescribingJson("iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0",{
        "data":{
          "data": indata
        },
        "schema": "iglu:"+os.environ["OPERATOR_ID"]+"/"+os.environ["APP_ID"]+"/jsonschema/1-0-0"
      }))

      t.flush()
    except Exception,Argument:
      print "Error:",str(Argument)
class SnowplowPlugin(Plugin):
    def __init__(self, vendor: str, options: SnowplowOptions) -> None:
        self._vendor = vendor
        if options.on_failure is None:
            options = options._replace(on_failure=self._on_failure)
        self._options: SnowplowOptions = options
        self._tracker: Optional[Tracker] = None
        self._logger: Logger = Logger.NONE

    def id(self) -> str:
        return 'snowplow'

    def load(self, options: PluginLoadOptions) -> None:
        self._logger = options.logger
        emitter = AsyncEmitter(**self._options._asdict(), )
        self._tracker = Tracker(emitter)

    def page(self, user_id: str, category: Optional[str], name: Optional[str],
             properties: Optional[Properties]) -> None:
        assert self._tracker is not None
        subject = Subject()
        subject.set_user_id(user_id)
        prev_subject = self._tracker.subject
        try:
            self._tracker.set_subject(subject)
            self._tracker.track_screen_view(name=name)
        finally:
            self._tracker.set_subject(prev_subject)

    def track(self, user_id: str, event: Event) -> None:
        assert self._tracker is not None
        subject = Subject()
        subject.set_user_id(user_id)
        prev_subject = self._tracker.subject
        try:
            self._tracker.set_subject(subject)
            schema_version = event.version.replace(".", "-")
            self._tracker.track_self_describing_event(
                SelfDescribingJson(
                    f'iglu:{self._vendor}/{event.id}/jsonschema/{schema_version}',
                    event.properties.to_json()))
        finally:
            self._tracker.set_subject(prev_subject)

    def flush(self) -> None:
        assert self._tracker is not None
        self._tracker.flush()

    def shutdown(self) -> None:
        self.flush()

    def _on_failure(self, sent_count: int, unsent: Any) -> None:
        self._logger.error("Error. Can't send events")
def save_tweet(data):
    #print "save_tweet"
    #print data

    indata = data

    e = Emitter(args.sp_collector_uri,
                protocol=args.sp_collector_protocol,
                port=int(args.sp_collector_port),
                method=args.sp_collector_method)
    t = Tracker(emitters=e,
                namespace="cf",
                app_id=args.sp_app_id,
                encode_base64=True)

    s1 = Subject()
    s1.set_platform("web")
    s1.set_user_id(str(indata.get("user_id")))
    s1.set_lang(str(indata.get("lang")))
    #s1.set_ip_address(str(indata.get("i_ip")))
    s1.set_useragent(str(indata.get("source")))

    t.set_subject(s1)

    t.track_self_describing_event(
        SelfDescribingJson(
            "iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0",
            {
                "data": {
                    "data": indata
                },
                "schema":
                "iglu:com.rbox24/" + args.sp_app_id + "/jsonschema/1-0-0"
            }))

    t.flush()
    print "Tweet sent to collector, time:", time.time()
Exemplo n.º 4
0
class SnowplowManager:
    def __init__(self, config):
        """
        Initialize service
        """
        with open('src/config.json') as config_file:
            self.defaultConfig = json.load(config_file)
        self.companyConfig = config
        self.tracker = None
        self.emitter = None
        self.subject = None

    def setup_tracker(self):
        """Setup an instance of a tracker"""
        self.companyConfig = self.setup_config(self.companyConfig)
        self.emitter = Emitter(self.companyConfig["COLLECTOR_HOST"],
                               protocol=self.companyConfig["PROTOCOL"],
                               port=self.companyConfig["PORT"],
                               method=self.companyConfig["EMIT_METHOD"],
                               buffer_size=self.companyConfig["BUFFER_SIZE"])
        self.subject = Subject()
        self.tracker = Tracker(emitters=self.emitter,
                               subject=self.subject,
                               namespace=self.companyConfig["TRACKER_NAME"],
                               app_id=self.companyConfig["APP_ID"],
                               encode_base64=self.companyConfig["ENCODE64"])

        return self.tracker

    def setup_config(self, config):
        """Setup config with company and default config"""
        if config['TRACKER_NAME'] is None or \
            config['APP_ID'] is None:
            return

        keys = [
            'COLLECTOR_HOST', 'PROTOCOL', 'EMIT_METHOD', 'BUFFER_SIZE',
            'DEBUG_MODE', 'ENCODE64', 'PORT'
        ]

        for key in keys:
            config[key] = self.defaultConfig[key]

        if "DEV_ENV" in config:
            if config["DEV_ENV"] == True:
                config["COLLECTOR_HOST"] = self.defaultConfig[
                    "COLLECTOR_HOST_DEV"]

        if "INSPETOR_ENV" in config:
            if config["INSPETOR_ENV"] == True:
                config["COLLECTOR_HOST"] = 'test'

        return config

    def track_describing_event(self, schema, data, context, action):
        """ Track describing snowplow event """
        self.tracker.track_self_describing_event(
            SelfDescribingJson(schema, data), [
                SelfDescribingJson(context, {'action': action}),
            ], self.get_normalized_timestamp())

    def track_non_describing_event(self, schema):
        """ Track non describing snowplow event """
        self.tracker.track_self_describing_event(
            SelfDescribingJson(
                self.defaultConfig["INGRESSE_SERIALIZATION_ERROR"],
                {'intendedSchemaId': schema}), [],
            self.get_normalized_timestamp())

    def flush(self):
        """
        Flush trackers
        """
        self.tracker.flush()

    def get_normalized_timestamp(self):
        """
        Get correct timestamp
        """
        return int(time.time()) * 1000

    def get_normalized_data(self, data):
        """
        Format string to replace non-ascii characters
        """
        return unicodedata.normalize('NFKD',
                                     data).encode('ascii',
                                                  'ignore').decode('utf-8')
Exemplo n.º 5
0
# the addcitizen event has no parameters of its own so we pass an empty array "{}"
addcitizen = SelfDescribingJson(
    'iglu:ca.bc.gov.cfmspoc/addcitizen/jsonschema/1-0-0', {})

# for chooseservices, we build a JSON array and pass it
chooseservice = SelfDescribingJson(
    'iglu:ca.bc.gov.cfmspoc/chooseservice/jsonschema/2-0-0', {
        "channel": "in-person",
        "program_id": 100,
        "parent_id": 0,
        "program_name": "example program name",
        "transaction_name": "example transaction name"
    })

beginservice = SelfDescribingJson(
    'iglu:ca.bc.gov.cfmspoc/beginservice/jsonschema/1-0-0', {})

# --- Trigger a sequence of events with varying random wait times
# --- addcitizen ---
time.sleep(random.randint(0, 11) + 3)
t.track_self_describing_event(addcitizen, [citizen, office, agent])

# --- chooseservice ---
time.sleep(random.randint(0, 4) + 2)
t.track_self_describing_event(chooseservice, [citizen, office, agent])

# --- beginservice ---
time.sleep(random.randint(0, 6) + 4)
t.track_self_describing_event(beginservice, [citizen, office])
def call_snowplow(request_id, json_object):
    '''Callback executed when an emitter is flushed successfully'''
    # Debugging request_id to see if it's being evaluated by the callbacks
    logger.info("Request ID on call_snowplow function: %s", request_id)

    # Use the global emitter and tracker dicts
    global e
    global t

    def callback_log_inscope():
        logger.info("callback_log_inscope has Request ID: %s", request_id)

    # callbacks are documented in
    # - https://github.com/snowplow/snowplow/wiki/Python-Tracker#emitters

    # callback for passed calls
    def on_success(successfully_sent_count):
        logger.info('\'on_success\' callback with %s successful events',
                    successfully_sent_count)
        callback_log_inscope()
        logger.info("Emitter call PASSED on request_id: %s.", request_id)
        # get previous try number, choose larger of 0 or query result and add 1
        max_try_number_query = ("SELECT MAX(try_number) "
                                "FROM caps.snowplow_calls "
                                "WHERE request_id = %s ;")
        try_number = max(i for i in [
            0,
            single_response_query(max_try_number_query, (request_id, ))[0]
        ] if i is not None) + 1
        logger.debug("Try number: %s", try_number)
        snowplow_tuple = (str(request_id), str(200), str(try_number),
                          json_object['env'], json_object['namespace'],
                          json_object['app_id'],
                          json_object['dvce_created_tstamp'],
                          json.dumps(json_object['event_data_json']))
        snowplow_id = single_response_query(snowplow_calls_sql,
                                            snowplow_tuple)[0]
        logger.info(
            "snowplow call table insertion PASSED on "
            "request_id: %s and snowplow_id: %s.", request_id, snowplow_id)

    # callback for failed calls
    failed_try = 0

    def on_failure(successfully_sent_count, failed_events):
        '''Callback executed when an emitter flush results in any failures'''
        # increment the failed try
        logger.warning(
            '\'on_failure\' callback: %s events successfully '
            'emitted, %s events returned by emitter with an error '
            'response', successfully_sent_count, len(failed_events))
        nonlocal failed_try
        failed_try += 1

        logger.info(
            'Emitter call FAILED on request_id %s on try %s. '
            'No re-attempt will be made.', request_id, failed_try)

        # failed_events should always contain only one event,
        # because ASyncEmitter has a buffer size of 1
        for event in failed_events:
            logger.warning('event failure: %s', event)
            snowplow_tuple = (str(request_id), str(400), str(failed_try),
                              json_object['env'], json_object['namespace'],
                              json_object['app_id'],
                              json_object['dvce_created_tstamp'],
                              json.dumps(json_object['event_data_json']))
            snowplow_id = single_response_query(snowplow_calls_sql,
                                                snowplow_tuple)[0]
            logger.info(
                "snowplow call table insertion PASSED on request_id: "
                "%s and snowplow_id: %s.", request_id, snowplow_id)
            # Re-attempt the event call by inputting it back to the emitter

    tracker_identifier = "{}-{}-{}".format(json_object['env'],
                                           json_object['namespace'],
                                           json_object['app_id'])
    logger.debug("New request with tracker_identifier %s", tracker_identifier)

    # logic to switch between SPM and Production Snowplow.
    sp_route = os.getenv("SP_ENDPOINT_{}".format(json_object['env'].upper()))
    logger.debug("Using Snowplow Endpoint %s", sp_route)

    # Set up the emitter and tracker. If there is already one for this
    # combination of env, namespace, and app-id, reuse it
    # TODO: add error checking
    # TEMP COMMENTED OUT TO AVOID USING THE GLOBAL DICT OF EMITTERS/TRACKERS
    # if tracker_identifier not in e:
    #     e[tracker_identifier] = AsyncEmitter(
    #         sp_route,
    #         protocol="https",
    #         on_success=on_success,
    #         on_failure=on_failure)
    #
    # if tracker_identifier not in t:
    #     t[tracker_identifier] = Tracker(
    #         e[tracker_identifier],
    #         encode_base64=False,
    #         app_id=json_object['app_id'],
    #         namespace=json_object['namespace'])

    this_ASyncEmitter = AsyncEmitter(sp_route,
                                     protocol="https",
                                     on_success=on_success,
                                     on_failure=on_failure)
    this_Tracker = Tracker(this_ASyncEmitter,
                           encode_base64=False,
                           app_id=json_object['app_id'],
                           namespace=json_object['namespace'])

    # Build event JSON
    # TODO: add error checking
    event = SelfDescribingJson(json_object['event_data_json']['schema'],
                               json_object['event_data_json']['data'])
    # Build contexts
    # TODO: add error checking
    contexts = []
    for context in json_object['event_data_json']['contexts']:
        contexts.append(SelfDescribingJson(context['schema'], context['data']))

    # Send call to Snowplow
    # TODO: add error checking
    # TEMP COMMENTED OUT TO AVOID USING THE GLOBAL DICT OF EMITTERS/TRACKERS
    # t[tracker_identifier].track_self_describing_event(
    #     event, contexts, tstamp=json_object['dvce_created_tstamp'])

    this_Tracker.track_self_describing_event(
        event, contexts, tstamp=json_object['dvce_created_tstamp'])