Exemplo n.º 1
0
def listen():
  print "attach"

  # Kafka
  consumer = KafkaConsumer(bootstrap_servers=os.environ["KAFKA_BOOTSTRAP_SRVS"], group_id=os.environ["KAFKA_GROUP_ID"])
  consumer.subscribe([os.environ["KAFKA_SOURCE_TOPIC"]])

  # Snowplow
  e = Emitter(os.environ["SP_COLLECTOR_URI"],protocol=os.environ["SP_COLLECTOR_PROTOCOL"],port=int(os.environ["SP_COLLECTOR_PORT"]),method=os.environ["SP_COLLECTOR_METHOD"])
  t = Tracker(emitters=e,namespace="cf",app_id=str(os.environ["APP_ID"]),encode_base64=True)

  for msg in consumer:
    #
    try:
      indata = json.loads(msg.value)
      
      s1 = Subject()
      s1.set_platform("app")
      s1.set_user_id("??")
      s1.set_lang("??")
      s1.set_ip_address("0.0.0.0")
      s1.set_useragent("??")
      
      t.set_subject(s1)

      t.track_self_describing_event(SelfDescribingJson("iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0",{
        "data":{
          "data": indata
        },
        "schema": "iglu:"+os.environ["OPERATOR_ID"]+"/"+os.environ["APP_ID"]+"/jsonschema/1-0-0"
      }))

      t.flush()
    except Exception,Argument:
      print "Error:",str(Argument)
class SnowplowPlugin(Plugin):
    def __init__(self, vendor: str, options: SnowplowOptions) -> None:
        self._vendor = vendor
        if options.on_failure is None:
            options = options._replace(on_failure=self._on_failure)
        self._options: SnowplowOptions = options
        self._tracker: Optional[Tracker] = None
        self._logger: Logger = Logger.NONE

    def id(self) -> str:
        return 'snowplow'

    def load(self, options: PluginLoadOptions) -> None:
        self._logger = options.logger
        emitter = AsyncEmitter(**self._options._asdict(), )
        self._tracker = Tracker(emitter)

    def page(self, user_id: str, category: Optional[str], name: Optional[str],
             properties: Optional[Properties]) -> None:
        assert self._tracker is not None
        subject = Subject()
        subject.set_user_id(user_id)
        prev_subject = self._tracker.subject
        try:
            self._tracker.set_subject(subject)
            self._tracker.track_screen_view(name=name)
        finally:
            self._tracker.set_subject(prev_subject)

    def track(self, user_id: str, event: Event) -> None:
        assert self._tracker is not None
        subject = Subject()
        subject.set_user_id(user_id)
        prev_subject = self._tracker.subject
        try:
            self._tracker.set_subject(subject)
            schema_version = event.version.replace(".", "-")
            self._tracker.track_self_describing_event(
                SelfDescribingJson(
                    f'iglu:{self._vendor}/{event.id}/jsonschema/{schema_version}',
                    event.properties.to_json()))
        finally:
            self._tracker.set_subject(prev_subject)

    def flush(self) -> None:
        assert self._tracker is not None
        self._tracker.flush()

    def shutdown(self) -> None:
        self.flush()

    def _on_failure(self, sent_count: int, unsent: Any) -> None:
        self._logger.error("Error. Can't send events")
def save_tweet(data):
    #print "save_tweet"
    #print data

    indata = data

    e = Emitter(args.sp_collector_uri,
                protocol=args.sp_collector_protocol,
                port=int(args.sp_collector_port),
                method=args.sp_collector_method)
    t = Tracker(emitters=e,
                namespace="cf",
                app_id=args.sp_app_id,
                encode_base64=True)

    s1 = Subject()
    s1.set_platform("web")
    s1.set_user_id(str(indata.get("user_id")))
    s1.set_lang(str(indata.get("lang")))
    #s1.set_ip_address(str(indata.get("i_ip")))
    s1.set_useragent(str(indata.get("source")))

    t.set_subject(s1)

    t.track_self_describing_event(
        SelfDescribingJson(
            "iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0",
            {
                "data": {
                    "data": indata
                },
                "schema":
                "iglu:com.rbox24/" + args.sp_app_id + "/jsonschema/1-0-0"
            }))

    t.flush()
    print "Tweet sent to collector, time:", time.time()
Exemplo n.º 4
0
class SnowplowManager:
    def __init__(self, config):
        """
        Initialize service
        """
        with open('src/config.json') as config_file:
            self.defaultConfig = json.load(config_file)
        self.companyConfig = config
        self.tracker = None
        self.emitter = None
        self.subject = None

    def setup_tracker(self):
        """Setup an instance of a tracker"""
        self.companyConfig = self.setup_config(self.companyConfig)
        self.emitter = Emitter(self.companyConfig["COLLECTOR_HOST"],
                               protocol=self.companyConfig["PROTOCOL"],
                               port=self.companyConfig["PORT"],
                               method=self.companyConfig["EMIT_METHOD"],
                               buffer_size=self.companyConfig["BUFFER_SIZE"])
        self.subject = Subject()
        self.tracker = Tracker(emitters=self.emitter,
                               subject=self.subject,
                               namespace=self.companyConfig["TRACKER_NAME"],
                               app_id=self.companyConfig["APP_ID"],
                               encode_base64=self.companyConfig["ENCODE64"])

        return self.tracker

    def setup_config(self, config):
        """Setup config with company and default config"""
        if config['TRACKER_NAME'] is None or \
            config['APP_ID'] is None:
            return

        keys = [
            'COLLECTOR_HOST', 'PROTOCOL', 'EMIT_METHOD', 'BUFFER_SIZE',
            'DEBUG_MODE', 'ENCODE64', 'PORT'
        ]

        for key in keys:
            config[key] = self.defaultConfig[key]

        if "DEV_ENV" in config:
            if config["DEV_ENV"] == True:
                config["COLLECTOR_HOST"] = self.defaultConfig[
                    "COLLECTOR_HOST_DEV"]

        if "INSPETOR_ENV" in config:
            if config["INSPETOR_ENV"] == True:
                config["COLLECTOR_HOST"] = 'test'

        return config

    def track_describing_event(self, schema, data, context, action):
        """ Track describing snowplow event """
        self.tracker.track_self_describing_event(
            SelfDescribingJson(schema, data), [
                SelfDescribingJson(context, {'action': action}),
            ], self.get_normalized_timestamp())

    def track_non_describing_event(self, schema):
        """ Track non describing snowplow event """
        self.tracker.track_self_describing_event(
            SelfDescribingJson(
                self.defaultConfig["INGRESSE_SERIALIZATION_ERROR"],
                {'intendedSchemaId': schema}), [],
            self.get_normalized_timestamp())

    def flush(self):
        """
        Flush trackers
        """
        self.tracker.flush()

    def get_normalized_timestamp(self):
        """
        Get correct timestamp
        """
        return int(time.time()) * 1000

    def get_normalized_data(self, data):
        """
        Format string to replace non-ascii characters
        """
        return unicodedata.normalize('NFKD',
                                     data).encode('ascii',
                                                  'ignore').decode('utf-8')