def listen(): print "attach" # Kafka consumer = KafkaConsumer(bootstrap_servers=os.environ["KAFKA_BOOTSTRAP_SRVS"], group_id=os.environ["KAFKA_GROUP_ID"]) consumer.subscribe([os.environ["KAFKA_SOURCE_TOPIC"]]) # Snowplow e = Emitter(os.environ["SP_COLLECTOR_URI"],protocol=os.environ["SP_COLLECTOR_PROTOCOL"],port=int(os.environ["SP_COLLECTOR_PORT"]),method=os.environ["SP_COLLECTOR_METHOD"]) t = Tracker(emitters=e,namespace="cf",app_id=str(os.environ["APP_ID"]),encode_base64=True) for msg in consumer: # try: indata = json.loads(msg.value) s1 = Subject() s1.set_platform("app") s1.set_user_id("??") s1.set_lang("??") s1.set_ip_address("0.0.0.0") s1.set_useragent("??") t.set_subject(s1) t.track_self_describing_event(SelfDescribingJson("iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0",{ "data":{ "data": indata }, "schema": "iglu:"+os.environ["OPERATOR_ID"]+"/"+os.environ["APP_ID"]+"/jsonschema/1-0-0" })) t.flush() except Exception,Argument: print "Error:",str(Argument)
class SnowplowPlugin(Plugin): def __init__(self, vendor: str, options: SnowplowOptions) -> None: self._vendor = vendor if options.on_failure is None: options = options._replace(on_failure=self._on_failure) self._options: SnowplowOptions = options self._tracker: Optional[Tracker] = None self._logger: Logger = Logger.NONE def id(self) -> str: return 'snowplow' def load(self, options: PluginLoadOptions) -> None: self._logger = options.logger emitter = AsyncEmitter(**self._options._asdict(), ) self._tracker = Tracker(emitter) def page(self, user_id: str, category: Optional[str], name: Optional[str], properties: Optional[Properties]) -> None: assert self._tracker is not None subject = Subject() subject.set_user_id(user_id) prev_subject = self._tracker.subject try: self._tracker.set_subject(subject) self._tracker.track_screen_view(name=name) finally: self._tracker.set_subject(prev_subject) def track(self, user_id: str, event: Event) -> None: assert self._tracker is not None subject = Subject() subject.set_user_id(user_id) prev_subject = self._tracker.subject try: self._tracker.set_subject(subject) schema_version = event.version.replace(".", "-") self._tracker.track_self_describing_event( SelfDescribingJson( f'iglu:{self._vendor}/{event.id}/jsonschema/{schema_version}', event.properties.to_json())) finally: self._tracker.set_subject(prev_subject) def flush(self) -> None: assert self._tracker is not None self._tracker.flush() def shutdown(self) -> None: self.flush() def _on_failure(self, sent_count: int, unsent: Any) -> None: self._logger.error("Error. Can't send events")
def save_tweet(data): #print "save_tweet" #print data indata = data e = Emitter(args.sp_collector_uri, protocol=args.sp_collector_protocol, port=int(args.sp_collector_port), method=args.sp_collector_method) t = Tracker(emitters=e, namespace="cf", app_id=args.sp_app_id, encode_base64=True) s1 = Subject() s1.set_platform("web") s1.set_user_id(str(indata.get("user_id"))) s1.set_lang(str(indata.get("lang"))) #s1.set_ip_address(str(indata.get("i_ip"))) s1.set_useragent(str(indata.get("source"))) t.set_subject(s1) t.track_self_describing_event( SelfDescribingJson( "iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", { "data": { "data": indata }, "schema": "iglu:com.rbox24/" + args.sp_app_id + "/jsonschema/1-0-0" })) t.flush() print "Tweet sent to collector, time:", time.time()
class SnowplowManager: def __init__(self, config): """ Initialize service """ with open('src/config.json') as config_file: self.defaultConfig = json.load(config_file) self.companyConfig = config self.tracker = None self.emitter = None self.subject = None def setup_tracker(self): """Setup an instance of a tracker""" self.companyConfig = self.setup_config(self.companyConfig) self.emitter = Emitter(self.companyConfig["COLLECTOR_HOST"], protocol=self.companyConfig["PROTOCOL"], port=self.companyConfig["PORT"], method=self.companyConfig["EMIT_METHOD"], buffer_size=self.companyConfig["BUFFER_SIZE"]) self.subject = Subject() self.tracker = Tracker(emitters=self.emitter, subject=self.subject, namespace=self.companyConfig["TRACKER_NAME"], app_id=self.companyConfig["APP_ID"], encode_base64=self.companyConfig["ENCODE64"]) return self.tracker def setup_config(self, config): """Setup config with company and default config""" if config['TRACKER_NAME'] is None or \ config['APP_ID'] is None: return keys = [ 'COLLECTOR_HOST', 'PROTOCOL', 'EMIT_METHOD', 'BUFFER_SIZE', 'DEBUG_MODE', 'ENCODE64', 'PORT' ] for key in keys: config[key] = self.defaultConfig[key] if "DEV_ENV" in config: if config["DEV_ENV"] == True: config["COLLECTOR_HOST"] = self.defaultConfig[ "COLLECTOR_HOST_DEV"] if "INSPETOR_ENV" in config: if config["INSPETOR_ENV"] == True: config["COLLECTOR_HOST"] = 'test' return config def track_describing_event(self, schema, data, context, action): """ Track describing snowplow event """ self.tracker.track_self_describing_event( SelfDescribingJson(schema, data), [ SelfDescribingJson(context, {'action': action}), ], self.get_normalized_timestamp()) def track_non_describing_event(self, schema): """ Track non describing snowplow event """ self.tracker.track_self_describing_event( SelfDescribingJson( self.defaultConfig["INGRESSE_SERIALIZATION_ERROR"], {'intendedSchemaId': schema}), [], self.get_normalized_timestamp()) def flush(self): """ Flush trackers """ self.tracker.flush() def get_normalized_timestamp(self): """ Get correct timestamp """ return int(time.time()) * 1000 def get_normalized_data(self, data): """ Format string to replace non-ascii characters """ return unicodedata.normalize('NFKD', data).encode('ascii', 'ignore').decode('utf-8')