def run(self): producer = KafkaProducer(bootstrap_servers='localhost:9092') while True: producer.send('my-topic', b"test") producer.send('my-topic', b"\xc2Hola, mundo!") time.sleep(1)
def create_restaurant(request): content = {"success": False} if request.method != "POST": content["result"] = "GET Request Received. Expected POST." else: request_url = settings.MODELS_LAYER_URL + "api/restaurants/create/" response = requests.post(request_url, data=request.POST) # POST.dict() or POST? r = json.loads(response.content.decode('utf-8')) if r['success']: # reservation_info = json.load(content['reservation']) producer = KafkaProducer(bootstrap_servers='kafka:9092') new_listing = request.POST new_listing['restaurant_id'] = r['user']['id'] producer.send('new-restaurant-topic', json.dumps(new_listing).encode('utf-8')) if r['success']: url = settings.MODELS_LAYER_URL + "api/auth/authenticator/create/" data = json.dumps(r['user']) r = requests.post(url, data={'user': data, 'username': request.POST['username'], 'password': request.POST['password']}).json() if r['success']: content['success'] = True content['auth'] = r['auth'] else: content['result'] = 'Models layer failed: ' + r['result'] else: content['result'] = "Models layer failed: " + r['result'] return JsonResponse(content)
class Producer(object): def __init__(self): #self.client = SimpleClient(addr) #self.producer = KeyedProducer(self.client) self.producer = KafkaProducer(bootstrap_servers=["50.112.40.243","52.25.13.29","50.112.22.187","52.24.80.162"],value_serializer=lambda v: json.dumps(v).encode('utf-8'),acks=0,linger_ms=500) def jsonITEM(self,itemList): strout='{' strout=strout+'"location":' strout=strout+'"'+itemList[0]+'"'+',' strout=strout+'"item":' strout=strout+'"'+str(itemList[1])+'"'+',' strout=strout+'"time":' strout=strout+str(itemList[2])+',' strout=strout+'"Producer":' strout=strout+str(itemList[3]) strout=strout+'}' return strout def produce_msgs(self): msg_cnt = 0 while True: lItem=getItemScanned() message_info={"location":lItem[0],"item":lItem[1],"time":lItem[2],"storeid":random.randint(0,NUM_USERS-1)} self.producer.send('price', message_info) print(message_info) time.sleep(.05) msg_cnt += 1
def main(): """ A generic Kafka producer for use as a Cylc event handler. USAGE: cylc_kafka_producer.py <HOST:PORT> <TOPIC> key1=val1 key2=val2 ... serializes {key1: val1, key2: val2, ...} to TOPIC at Kafka on HOST:PORT. This is generic in that a JSON message schema is defined by the received command line keyword arguments. To enforce compliance to a particular schema, copy and modify as needed. Can be partnered with the generic cylc_kafka_consumer external trigger function, for triggering downstream suites. """ if 'help' in sys.argv[1]: print cleandoc(main.__doc__) sys.exit(0) # TODO exception handling for bad inputs etc. kafka_server = sys.argv[1] kafka_topic = sys.argv[2] # Construct a message dict from kwargs. dmsg = dict([k.split('=') for k in sys.argv[3:]]) producer = KafkaProducer( bootstrap_servers=kafka_server, value_serializer=lambda msg: json.dumps(msg).encode('utf-8')) producer.send(kafka_topic, dmsg) producer.flush()
class KafkaHandler(logging.Handler): def __init__(self, host, *args, **kwargs): super(KafkaHandler, self).__init__(*args, **kwargs) self.kafka_producer = KafkaProducer(bootstrap_servers=host) def emit(self, record): message = self.format(record) event_dict = { 'klog_level': record.levelname.upper(), 'klog_time': record.created, 'klog_message': message, } for attribute, value in six.iteritems(vars(record)): event_dict[attribute] = value json_dump = json.dumps(event_dict) self.kafka_producer.send(str(record.name).encode('utf-8') + '.json', json_dump.encode('utf-8')) self.kafka_producer.send('all.json', json_dump.encode('utf-8')) self.kafka_producer.send(str(record.name).encode('utf-8') + '.txt', message.encode('utf-8')) self.kafka_producer.send('all.txt', message.encode('utf-8')) self.flush() def flush(self): self.kafka_producer.flush()
def create_listing_exp_api(request): auth = request.POST.get('auth') user_id = requests.get('http://modelsul:8000/api/v1/get_userid_auth/' + auth).json() user_id1 = user_id['resp']['user_id'] title = request.POST.get('title', 'default') category = request.POST.get('category', 'default') subcategory = request.POST.get('subcategory', 'default') summary = request.POST.get('summary', 'default') price = request.POST.get('price', 'default') #Needs to verify that the person is authorized auth = request.POST.get('auth', 'default') post = requests.post('http://modelsul:8000/api/v1/create_post/', data={"user_id": user_id1, "title": title, "category": category, "subcategory": subcategory, "summary": summary, "price":price}) if not post.json()['ok']: return JsonResponse({}) else: producer = KafkaProducer(bootstrap_servers='kafka:9092') some_new_listing = {'title': title, 'description': summary, 'id':post.json()['resp']['id']} producer.send('new-listings-topic', json.dumps(some_new_listing).encode('utf-8')) return JsonResponse(post.json())
def create_reservation(request): content = {'success': False} if request.method != 'POST': content['result'] = "Invalid request method. Expected POST." else: # AUTHENTICATE USER (get customer ID) authenticator = request.POST['authenticator'] if not authenticator: return "No auth Anonymous" r = get_user(authenticator) if r['success']: # call function to put a new listing into model url = settings.MODELS_LAYER_URL + "api/reservations/create/" dt = json.loads(request.POST['reservation_details']) params = dt # print(r['user']['id']) params['customer'] = r['user']['id'] content = requests.post(url, params).json() if content['success']: # add listing into kafka reservation_info = content['reservation'] # reservation_info = json.load(content['reservation']) producer = KafkaProducer(bootstrap_servers='kafka:9092') new_listing = reservation_info producer.send('new-listings-topic', json.dumps(new_listing).encode('utf-8')) else: # failed to add it to the database return JsonResponse(content) else: content['result'] = "User not authenticated." print(content) return JsonResponse(content)
class Results: def GET(self): self.producer = KafkaProducer(bootstrap_servers='localhost:9092') self.goodtopic= 'goodtopic' self.badtopic= 'badtopic' self.spamtopic ='spamtopic' self.stop=set(nltk.corpus.stopwords.words('english')) self.stop.update(['http','https','rt']) self.db=pymongo.MongoClient() fp = open('json.txt','w') web.header('Access-Control-Allow-Origin', '*') web.header('Access-Control-Allow-Credentials', 'true') web.header('Content-Type', 'application/json') user_data= web.input(id={}) data = user_data.id data = json.loads(str(data)) for line in data: texto = '' tokens = nltk.word_tokenize(data[line]['tweet']) for w in tokens: w = w.lower() w = w.encode('utf-8') if w.isalpha() and w not in self.stop: texto=texto + ' ' + w texto = texto.encode('utf-8') if(data[line]['answer']=='Good'): self.db.LEARNING.goodlearning.update({"type":'contagem'},{"$inc": {'count':1}},upsert=True) self.producer.send(self.goodtopic,texto) if(data[line]['answer']=='Bad'): self.db.LEARNING.badlearning.update({"type":'contagem'},{"$inc": {'count':1}},upsert=True) self.producer.send(self.badtopic,texto) #if(data[line]['answer']=='Spam'): #self.producer.send(self.spamtopic,texto) return 'algo'
def run(): parser = get_args_parser() try: parse_result = parser.parse_args() topic_name = parse_result.topic num_records = parse_result.num_records record_size = parse_result.record_size producer_props = parse_result.producer_config props = {} for prop in producer_props: k, v = prop.split('=') try: v = int(v) except ValueError: pass props[k] = v producer = KafkaProducer(**props) record = bytes(bytearray(record_size)) stats = Stats(num_records, 5000) for i in xrange(num_records): send_start_ms = get_time_millis() future = producer.send(topic=topic_name, value=record) future.add_callback(stats.next_completion( send_start_ms, record_size, stats)) producer.close() stats.print_total() except Exception as e: exc_info = sys.exc_info() traceback.print_exception(*exc_info) sys.exit(1)
def stream_generator(): rediscon=redis.StrictRedis(host='ec2-52-40-47-83.us-west-2.compute.amazonaws.com', port=6379, db=0,password='') producer = KafkaProducer(bootstrap_servers=["52.41.140.111:9092","52.41.90.5:9092","52.41.120.152:9092"]) res = rediscon.get('active') tp=random.randrange(900000,1800001) st = int(round(time.time() * 1000)) diff=0 while True: if res==1 and diff==0: tp=random.randrange(900000,1800001) st = int(round(time.time() * 1000)) if res == 1: diff = int(round(time.time() * 1000))- st st1=0 #steps st2=0 u1=0 #user_id u2=1 now=datetime.datetime.now()-datetime.timedelta(hours=7) hr1=random.randrange(60,200) #heart_rate hr2=random.randrange(60,200) if diff % 1000 == 0: st1=random.randrange(0,3) st2=random.randrange(0,3) print '-------------------'+str(diff)+'-----------------------' data1=str(now)+","+str(u1)+","+str(st1)+","+str(hr1) data2=str(now)+","+str(u2)+","+str(st2)+","+str(hr2) producer.send('stream_test',data1) producer.send('stream_test',data2) print '*' if diff ==tp: rediscon.set('active',0) res=rediscon.get('active') diff=0 res=rediscon.get('active')
class SensorHatLogger: """ Logs the hostname, time (unixtime), temperature, humidity, and pressure to Kafka in JSON format. The data is generated by a Raspberry Pi with a Sense Hat: https://www.raspberrypi.org/products/sense-hat/ This captures a read approx. every 10 seconds. TODO: https://github.com/initialstate/wunderground-sensehat/wiki/Part-3.-Sense-HAT-Temperature-Correction """ def __init__(self): self.producer = KafkaProducer(bootstrap_servers='hdp01.woolford.io:6667') self.sense = SenseHat() self.sensor_record = dict() def read_values_from_sensor(self): self.sensor_record['host'] = socket.gethostname() self.sensor_record['timestamp'] = int(time.time()) self.sensor_record['temperature'] = self.sense.get_temperature() self.sensor_record['humidity'] = self.sense.get_humidity() self.sensor_record['pressure'] = self.sense.get_pressure() def send_record_to_kafka(self): sensor_record_json = json.dumps(self.sensor_record) self.producer.send("temperature_humidity_json", sensor_record_json) def run(self): self.read_values_from_sensor() self.send_record_to_kafka()
class SimpleKafkaProducer: def __init__(self): self.producer = KafkaProducer(bootstrap_servers=kafka_bootstrap_servers) def send_message(self, topic, msg, key=None): # print("# sending msg: ", key, msg) self.producer.send(topic, msg, key)
class KafkaProducerCountBolt(BasicBolt): numWindowChunks = 5 emitFrequencyInSeconds = 10 windowLengthInSeconds = numWindowChunks * emitFrequencyInSeconds def __init__(self): super(KafkaProducerCountBolt, self).__init__(script=__file__) self.counter = SlidingWindowCounter(5) def initialize(self, conf, context): self.producer = KafkaProducer(bootstrap_servers='localhost:9092') self.topic = 'spamwordcounttopic' @classmethod def declareOutputFields(cls): return ['word', 'count'] def process(self, tup): if tup.is_tick_tuple(): self.emitCurrentWindowCounts() else: self.counter.incrementCount(tup.values[0]) def emitCurrentWindowCounts(self): counts = self.counter.getCountsThenAdvanceWindow() for k, v in counts.iteritems(): word2 = k.encode('utf-8')+ ' '+ str(v) self.producer.send(self.topic,word2) storm.emit([k, v]) def getComponentConfiguration(self): return {"topology.tick.tuple.freq.secs":300}
def run(self): producer = KafkaProducer(bootstrap_servers='172.16.218.128:10021') while True: producer.send("test", "msg") # producer.send("test", "abc") time.sleep(1)
def run(self, run_time): """ Send checkresults to Kafka Topic """ logging.debug("Establishing passive handler: Kafka") super(Handler, self).run() itemlist = [] for check in self.checks: if check.needs_to_run(): item = self.do_check(check) item.check_time = run_time check.set_next_run(run_time) item.hostname = self.get_kafka_hostname(item) itemlist.append(item) if len(itemlist) > 0: try: logging.info('Connect to Kafka Server') producer = KafkaProducer(bootstrap_servers=['{}'.format(self.str_kafakhosts)], client_id=self.str_client_id) except KafkaError: logging.warn( 'Problem to connect Kafka Server: {} with Topic: {} and Clientname {} '.format(self.str_kafakhosts, self.str_topic, self.str_client_id)) for item in itemlist: producer.send(self.str_topic, key=str(item.hostname), value=json.dumps(self.format_for_kafka(self, item))) producer.flush()
def run(self): producer = KafkaProducer(**KAFKA_PRODUCER_CONFIG) while True: producer.send('python-madrid', b"FOO") producer.send('python-madrid', b"BAR") producer.send('python-madrid', b"BAZ") time.sleep(5)
class TwitterListener(tweepy.StreamListener): def __init__(self,stop,user): self.producer = KafkaProducer(bootstrap_servers='localhost:9092') self.instanttopic = 'instanttopic' self.user = str(user) self.numstop = int(stop) def on_data(self, data): fil = open("meu.txt","a") stop=set(nltk.corpus.stopwords.words('english')) stop.update(['http','https','rt']) tweet = json.loads(data) if 'text' in tweet: texto =tweet['text'].encode('utf-8','ignore') self.numstop -=1 texto = self.user+'-'+texto self.producer.send(self.instanttopic,texto) saveTweet('pos',tweet,self.user) saveLocation('pos',tweet,self.user) vs = vaderSentiment(str(texto)) contagemneg= vs['neg'] contagempos= vs['pos'] contagemspam=vs['neu'] filo= open("vader.txt",'a') if self.numstop == 0: return False return True
def kafka_producer_call(): kafka_producer = KafkaProducer(bootstrap_servers=KAFKA_SERVER) for i in range(NB_MESSAGES): word = "yay" kafka_producer.send(KAFKA_TOPIC, word) kafka_producer.flush() return 1
class KafkaMessageSender(object): def __init__(self,config_source): self.config_source = config_source # config_source = "config/producer_config.yml" # load configuration parameters config = yaml_loader(self.config_source) # initialize parameters self.topics = config['topics'] self.port = config['port'] self.current_topic = self.topics[0] self.producer = KafkaProducer(bootstrap_servers=[self.port]) def send_message(self,messages): for message in messages: # self.producer.send(self.current_topic, value = message.strip('[]').splitlines()[0] ) print message.strip('[]') self.producer.send(self.current_topic, value = message.strip('[]') ) # block until all async messages are sent self.producer.flush()
def stream_events(l_clusts, job, debug=False): print "Converting to QCR format" kafka_url = job['kafka_url'] kafka_topic = job['kafka_topic'] try: kds = [] for clust in l_clusts: kds.extend(to_qcr_format(clust, job, debug=debug)) except Exception as exc: print exc traceback.print_exc() if kafka_url == 'print': print "Printing events to console instead of sending them to kafka." for doc in kds: for k, v in doc.iteritems(): print k, v return #wait until the very last second to import these kafka packages from kafka import KafkaProducer from kafka.errors import KafkaError producer = KafkaProducer(bootstrap_servers=kafka_url, value_serializer=lambda v: json.dumps(v).encode('utf-8')) print "Streaming Events" for doc in kds: try: state = producer.send(kafka_topic, doc) record_metadata = state.get(timeout=10) print (record_metadata.topic) print (record_metadata.partition) print (record_metadata.offset) except KafkaError as err: traceback.print_exc()
def produce_to_bruce(schema, args, config): topic = config['kafka']['topic'] if args.partition_count: partition_count = args.partition_count else: print 'fetch partition info for topic ' + topic producer = KafkaProducer(bootstrap_servers = config['kafka']['brokers']) partition_count = 1 + max(producer.partitions_for(topic)) producer.close() socket = bruce.open_bruce_socket() # batching socket send buff = [] def flush_buff(): for msg in buff: socket.sendto(msg, '/var/run/bruce/bruce.socket') del buff[:] def f_produce(topic, partition, key, value): if len(buff) < 1000: buff.append(bruce.create_msg(partition, topic, bytes(key), bytes(value))) else: flush_buff() try: bootstrap(f_produce, partition_count, schema, args.database, args.table, config) flush_buff() except KeyboardInterrupt: sys.exit(1) finally: socket.close()
def run(self): producer = KafkaProducer(bootstrap_servers='localhost:9092') while True: producer.send('my-topic', b"test for hw08-solution02") producer.send('my-topic', b"\you are good ,done!") time.sleep(1)
class KafkaPythonClient(PythonClient): def __init__(self,topic=topic_name, kafkaHost = kafka_host, zookeeperHost=zookeeper_host): self.config["topic"] = topic self.config["kafkaHost"] = kafkaHost self.config["zookeeperHost"] = zookeeperHost super(KafkaPythonClient, self).__init__() def createProducer(self, kafkaSync): self.config["kafkaSync"] = kafkaSync self.producer = KafkaProducer(bootstrap_servers=self.config["kafkaHost"]) def createConsumer(self): self.consumer = KafkaConsumer(bootstrap_servers=self.config["kafkaHost"], enable_auto_commit=True, auto_offset_reset='latest',consumer_timeout_ms=1000) self.consumer.subscribe([self.config["topic"]]) def produce(self, num_msg=20000): self.msgCount = num_msg for x in range (self.msgCount): self.prtProgress(x, 10000) result = self.producer.send(self.config["topic"], self.msg) if self.config["kafkaSync"] == True: # block for "synchronous" mode: try: result_metadata = result.get(timeout=10) except KafkaError: print "*** KAFKA ERROR ***" pass if (x >= 10000): sys.stdout.write('\n') def consume(self, num_msg): count = 0 for message in self.consumer: count += 1 self.prtProgress(count, 10000) sys.stdout.write('\n') if num_msg > 0: if count != num_msg: print "ERROR: KafkaPythonClient.consume: # of messages not as expected, read: {}, expected: {}".format(count, num_msg) return count def startProducer(self): pass def stopProducer(self): self.beforeFlushTimer(self.timeDict['producer']) if self.config["kafkaSync"] == False: self.producer.flush() def stopConsumer(self): pass def initCount(self): self.consume(0) # for p in self.consumer.partitions_for_topic(self.config['topic']): # tp = TopicPartition(self.config['topic'], p) # self.consumer.assign([tp]) # committed = self.consumer.committed(tp) # consumer.seek_to_end(tp) def finalize(self): pass
class sinktask(object): def __init__(self, kafka_URI, topic_str): self.producer = KafkaProducer(bootstrap_servers=kafka_URI) self.topic = topic_str def execute(self, data): self.producer.send(self.topic, bytes(data))
def run(self): producer = KafkaProducer(bootstrap_servers='localhost:9092') self.sent = 0 while not producer_stop.is_set(): producer.send('my-topic', self.big_msg) self.sent += 1 producer.flush()
def submit_kafka_job(job, type): producer = KafkaProducer(bootstrap_servers='kafka:9092') if type == CREATE: kafka_queue = 'create-ride-topic' elif type == UPDATE: kafka_queue = 'update-ride-topic' else: kafka_queue = 'delete-ride-topic' producer.send(kafka_queue, json.dumps(job).encode('utf-8'))
def run(self): print "producer" producer = KafkaProducer(bootstrap_servers='kafka:9092') print "producer... ok" while True: producer.send('my-topic', b"test") producer.send('my-topic', b"\xc2Hola, mundo!") time.sleep(1)
def getstarted(): name = request.form['userName']; print(request.form['temperature']); object = {"sensorID": str(name),"time":datetime.datetime.now().strftime('%a %b %d %Y %H:%M:%S'),"temperature": str(request.form['temperature']),"flag": "false"} print(object) producer = KafkaProducer(value_serializer=lambda m: json.dumps(m).encode('ascii')) producer.send('test',object) print("Generated...") return json.dumps(object)
class KafkaSender(): def __init__(self): self.client=KafkaClient(hosts) #self.producer = SimpleProducer(self.client,batch_send=batch_send,batch_send_every_n=batch_send_every_n) self.producer=KafkaProducer(bootstrap_servers=hosts) self.client.ensure_topic_exists(topic) def send_messages(self,msg): self.producer.send(topic,msg)
class KafkaBeerPipeline(object): def __init__(self): self.producer = KafkaProducer(bootstrap_servers=['localhost:9092']) #serializer = MessageSerializer(client) def process_item(self, item, spider): client = SchemaRegistryClient(url='http://localhost:8081') schema_id, avro_schema, schema_version = client.get_latest_schema('beerscraper') serializer = MessageSerializer(client) encoded = serializer.encode_record_with_schema('beer',avro_schema,item.__dict__['_values']) self.producer.send('beer',encoded)
DEVICE_PROFILES = { "seoul": {'temp': (30.3, 7.7), 'humd': (77.4, 18.7), 'pres': (1019.9, 9.5)}, "home": {'temp': (24.5, 3.3), 'humd': (33.0, 13.9), 'pres': (1000.0, 11.3)}, } if len(sys.argv) !=2 or sys.argv[1] not in DEVICE_PROFILES.keys(): print("please provide a valid device name:") for key in DEVICE_PROFILES.keys(): print(f" {key}") print(f"\nformat: {sys.argv[0]} DEVICE_NAME") sys.exit(1) profile_name = sys.argv[1] profile = DEVICE_PROFILES[profile_name] producer = KafkaProducer(bootstrap_servers='kafka-single-node:9092') while True: temp = np.random.normal(profile['temp'][0], profile['temp'][1]) humd = max(0, min(np.random.normal(profile['humd'][0], profile['humd'][1]), 100)) pres = np.random.normal(profile['pres'][0], profile['pres'][1]) msg = f'{time()},{profile_name},{temp},{humd},{pres}' producer.send('iot', bytes(msg, encoding='utf8')) print('sending data to kafka') print(msg) sleep(.5)
# -*- coding: utf-8 -*- import json from kafka import KafkaProducer from kafka import KafkaConsumer producer = KafkaProducer(value_serializer=lambda v: json.dumps(v).encode('utf-8'), bootstrap_servers='49.4.90.247:6667') msg_dict = { "sleep_time": 10, "db_config": { "database": "test_1", "host": "xxxx", "user": "******", "password": "******" }, "table": "msg", "msg": "Hello World" } msg = json.dumps(msg_dict) future = producer.send('test_rhj', msg) record_metadata = future.get(timeout=10) print(record_metadata.topic) print(record_metadata.partition) print(record_metadata.offset) producer.close()
class KafkaPC: def __init__(self, config_path, config_section): super(KafkaPC, self).__init__() self.in_topic = None self.out_topic = None self.in_schema = None self.out_schema = None self.read_config(config_path, config_section) self.read_topics() self.create_consumer() self.create_producer() def read_config(self, config_path, config_section): self.config = {} if config_path is not None and config_section is not None: config_section = config_section.replace(" ", "").split(",") else: raise ValueError( "Configuration requires config_path and config_section") try: with open(config_path, "r") as ymlfile: config = yaml.load(ymlfile, Loader=yaml.FullLoader) for section in config_section: for key, value in config[section].items(): self.config[key] = value except Exception as e: print(f"Failed to read the config: {repr(e)}") sys.exit() def read_topics(self): if self.config.get("IN_TOPIC") and self.config.get("IN_GROUP"): self.in_topic = list(self.config["IN_TOPIC"].keys()) self.in_schema = {} for topic, schema in self.config["IN_TOPIC"].items(): self.in_schema[topic] = self.read_avro_schema(schema) if self.config.get("OUT_TOPIC"): self.out_topic = list(self.config["OUT_TOPIC"].keys()) self.out_schema = {} for topic, schema in self.config["OUT_TOPIC"].items(): self.out_schema[topic] = self.read_avro_schema(schema) def create_consumer(self): if self.config.get("IN_TOPIC") and self.config.get("IN_GROUP"): self.consumer = KafkaConsumer( group_id=self.config["IN_GROUP"], bootstrap_servers=[self.config["KAFKA_BROKER_URL"]], auto_offset_reset='earliest') self.consumer.subscribe(self.in_topic) def create_producer(self): if self.config.get("OUT_TOPIC"): self.producer = KafkaProducer( linger_ms=50, bootstrap_servers=[self.config["KAFKA_BROKER_URL"]]) def read_avro_schema(self, schema): return avro.schema.Parse(open(schema).read()) """ can we delete this function, if we don´t want to send messages WITHOUT schema? def decode_msg(self, msg): try: decoded = msg.value.decode("utf-8") return decoded except Exception as e: print(f'Error decoding data: {repr(e)}') sys.exit() """ def decode_avro_msg(self, msg): try: bytes_reader = io.BytesIO(msg.value) decoder = avro.io.BinaryDecoder(bytes_reader) reader = avro.io.DatumReader(self.in_schema[msg.topic]) return reader.read(decoder) except Exception as e: print(f"Error decoding avro data: {repr(e)}") sys.exit() def __encode(self, data, schema): raw_bytes = None try: writer = DatumWriter(schema) bytes_writer = io.BytesIO() encoder = BinaryEncoder(bytes_writer) writer.write(data, encoder) raw_bytes = bytes_writer.getvalue() except Exception as e: print(f"Error encoding data: {repr(e)}") return raw_bytes def send_msg(self, data, key=0, topic=None): # if no topic is provided, the first topic in the list is used as default if topic is None: out_topic = self.out_topic[0] else: out_topic = topic schema = self.out_schema[out_topic] # encode the data with the specified Avro out_schema raw_bytes = self.__encode(data, schema) # publish the message if encoding was successful if raw_bytes is not None: try: self.producer.send(out_topic, raw_bytes, partition=key) except Exception as e: print(f"Error sending data to Kafka: {repr(e)}") """ remove if no longer required
import requests import threading import re import json import time from kafka import KafkaProducer from bs4 import BeautifulSoup producer = KafkaProducer(bootstrap_servers='node-3:9093', value_serializer=lambda v: json.dumps(v).encode('utf-8')) PAGE = 1 HEADERS = { 'origin': 'https://careers.ibm.com', 'accept-encoding': 'gzip, deflate, br', 'accept-language': 'en-US,en;q=0.9', 'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36', } THREADS = [] cont = 0 def get_info(url, country): global cont try: soup = BeautifulSoup(requests.get( url, headers=HEADERS).text, "html.parser") cont+=1 print(cont) job = {} job['url'] = url # Title
app = Flask(__name__) client = MongoClient('172.23.0.5', 27017) from pyelasticsearch import ElasticSearch elastic = ElasticSearch(config.ELASTIC_URL) import json # Date/time stuff import iso8601 import datetime # Setup Kafka from kafka import KafkaProducer producer = KafkaProducer(bootstrap_servers=['172.23.0.2:9092'], api_version=(0, 10)) PREDICTION_TOPIC = 'flight_delay_classification_request' import uuid # Chapter 5 controller: Fetch a flight and display it @app.route("/on_time_performance") def on_time_performance(): carrier = request.args.get('Carrier') flight_date = request.args.get('FlightDate') flight_num = request.args.get('FlightNum') flight = client.agile_data_science.on_time_performance.find_one({ 'Carrier':
TWITTER_APP_KEY = '' TWITTER_APP_KEY_SECRET = '' #Authenticating Credentials twitterauth = Twython(app_key=TWITTER_APP_KEY, app_secret=TWITTER_APP_KEY_SECRET, oauth_token=TWITTER_ACCESS_TOKEN, oauth_token_secret=TWITTER_ACCESS_TOKEN_SECRET) # Initializing Kafka KAFKA_HOST = 'localhost:9092' TOPIC = 'twitter' producer = KafkaProducer(bootstrap_servers=[KAFKA_HOST]) def get_tweets(keyword): search = twitterauth.search(q=keyword, count=100) tweets = [] tweets = search['statuses'] for tweet in tweets: if tweet['geo'] != None: print(tweet['user']['lang']) if tweet['user']['lang'] == 'en': text = tweet['text'].lower().encode('ascii', 'ignore').decode('ascii')
class GMostRequestedPriority_Job: def __init__(self, request_data_dic): self.job_name = mostDefine.SELF_POLICY_NAME self.requestDataDic = request_data_dic self.requestID = request_data_dic['requestID'] self.fileID = request_data_dic['fileID'] self.failCnt = request_data_dic['failCnt'] self.env = request_data_dic['env'] self.targetClusters = self.env['targetClusters'] self.sharedClusters = self.get_shared_clusters() self.producer = KafkaProducer( acks=0, compression_type='gzip', bootstrap_servers=[mostDefine.KAFKA_SERVER_URL], value_serializer=lambda x: dumps(x).encode('utf-8')) def get_shared_clusters(self): for item in self.targetClusters: if type(item).__name__ == list: if len(item) > 1: return item else: return None else: print() #apply low-latency yaml with def check_res_fail(self, res): if res == None: return True if 'hcode' not in res: return True if 'lcode' not in res: return True if 'msg' not in res: return True if 'result' not in res['msg']: return True return False def request_clusters_available_resource_from_clusterAgent(self, clusters): try: temp_msg = { 'source': { 'type': 'none' }, 'target': { 'type': 'cluster', 'object': clusters }, 'hcode': 300, 'lcode': 1, 'msg': { 'requestID': self.requestID } } self.producer.send(mostDefine.GLOBAL_SCHEDULER_GLOBAL_TOPIC_NAME, value=temp_msg) self.producer.flush() except: return 'process_fail' return 'process_success' def wait_request_clusters_available_resource_from_clusterAgent( self, clusters): clusters_data_list = [] re_count = len(clusters) for i in range(re_count): res = self.wait_consumer() if res == None: print('res is None') return 'process_fail', clusters_data_list is_process_fail = self.check_res_fail(res) hcode = res['hcode'] lcode = res['lcode'] result = res['msg']['result'] ''' result: {'cpu':90 ,'memory':87, 'memory_szie_mbyte':12000, 'score': 177 } ''' if is_process_fail: print('Fail Job:', res) return 'process_fail', clusters_data_list else: if hcode == 300 and lcode == 2: clusters_data_list.append(result) else: return 'process_fail', clusters_data_list print('clusters_data_list', clusters_data_list) sorted_clusters_data_list = sorted(clusters_data_list, key=itemgetter('score')) return 'process_success', sorted_clusters_data_list def apply_yaml_to_ClusterAgent(self, cluster): print('apply_yaml_to_ClusterAgent:', cluster) try: temp_msg = { 'source': { 'type': 'none' }, 'target': { 'type': 'cluster', 'object': cluster }, 'hcode': 310, 'lcode': 1, 'msg': { 'requestID': self.requestID, 'fileID': self.fileID, 'requestData': self.requestDataDic } } self.producer.send(mostDefine.GLOBAL_SCHEDULER_GLOBAL_TOPIC_NAME, value=temp_msg) self.producer.flush() except: return 'process_fail' return 'process_success' def wait_apply_yaml_to_ClusterAgent(self): res = self.wait_consumer() if res == None: print('res is None') return 'process_fail' is_process_fail = self.check_res_fail(res) hcode = res['hcode'] lcode = res['lcode'] result = res['msg']['result'] print('hcode :hcode,result', hcode, lcode, result) if is_process_fail: print('Fail Job:', res) return 'process_fail' else: if hcode == 310 and lcode == 2: if result == 'success': return 'apply_success' elif result == 'fail': return 'apply_fail' elif result == 'cancel': return 'cancel' else: return 'process_fail' else: return 'process_fail' def wait_consumer(self): print('wait_consumer') consumer = KafkaConsumer( self.requestID, bootstrap_servers=[mostDefine.KAFKA_SERVER_URL], auto_offset_reset='earliest', enable_auto_commit=True, group_id=self.requestID, value_deserializer=lambda x: loads(x.decode('utf-8')), consumer_timeout_ms=1000 * 10) print('w-1') res = None for message in consumer: print("Topic: %s, Partition: %d, Offset: %d, Key: %s, Value: %s" % (message.topic, message.partition, message.offset, message.key, message.value)) res = message.value break consumer.close() return res
def __init__(self, topic: str): self.__topic = topic self.__servers = settings.KAFKA_HOSTS self.__producer = KafkaProducer(bootstrap_servers=self.__servers, retries=5)
def add_timestamp_to_response(data): # this data does not include a timestamp, let's add one timestamp = datetime.datetime.now().timestamp() data["timestamp"] = timestamp return data def add_otherdata_to_response(data): data["otherdata"] = "hello this is a test" return data def send_to_kafka(data): producer.send('API-CiscoStock', value=data) if __name__ == "__main__": # included here as it's not available on github from kafka import KafkaProducer # create a handler for our kafka producer producer = KafkaProducer( bootstrap_servers=['localhost:9092'], value_serializer=lambda x: dumps(x).encode('utf-8')) # lets go run()
import json from kafka import KafkaProducer from kafka import KafkaConsumer producer = KafkaProducer( bootstrap_servers='localhost:9092', value_serializer=lambda v: json.dumps(v).encode('utf-8')) # producer.send('sample',b'hello world') # with open('/home/varsha/wikiticker-2015-09-12-sampled.json') as f: # data = json.load(f) # print(data) # producer.flush() data = [] with open( '/home/ramya/PISA_Consolidated_Json/part-00000-6c97ad3b-1a5d-43ab-91e9-239ca8c8db62-c000.json', 'r') as f: for line in f: # data.append(json.loads(line)) print(json.loads(line)) producer.send('Demo_kafka', json.loads(line)) producer.flush() # keylist=json.loads(line).keys() # print(keylist)
import json from kafka import KafkaProducer file_handler = logging.handlers.RotatingFileHandler( filename='read_alterts.log', mode='a', encoding='utf-8') logging.basicConfig( #filename="read_alterts.log", #filemode='w', handlers=[file_handler], level=logging.INFO, format='%(asctime)s|%(levelname)s|%(message)s', datefmt='%Y-%m-%d %H:%M:%S') producer = KafkaProducer( bootstrap_servers='10.1.11.175:9292,10.1.11.176:9292,10.1.11.177:9292', value_serializer=lambda v: json.dumps(v).encode('utf-8')) es = Elasticsearch(['http://10.1.11.176:19210', 'http://10.1.11.177:19210']) def get_source(page): """ 获取es存储的记录 :param page: :return: """ records = page['hits']['hits'] for record in records: #jsonStr = json.dumps(record['_source']) logging.info(isinstance(record['_source'], dict))
from time import sleep from json import dumps from kafka import KafkaProducer producer = KafkaProducer(bootstrap_servers=['localhost:9092'], value_serializer=lambda x: dumps(x).encode('utf-8')) for e in range(1000): data = {'number' : e} producer.send('test',key = b'consoledata',value=data) sleep(5)
jsondata = json.loads(message) query = "INSERT INTO records (id, time, photo, food, calorie, carbo, protein, fat, fiber)" \ "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)" session.execute( query, (jsondata['user'], time, jsondata['drawn_img'], jsondata['class'], jsondata['calories'], jsondata['carbo'], jsondata['protein'], jsondata['fat'], jsondata['fiber'])) topic_to_consume = {"inputImage": 0, "inputImage": 1, "inputImage": 2} topic_for_produce = "outputResult" kafka_endpoint = "G401:9092,G402:9092,G403:9092,G404:9092,"\ "G405:9092,G406:9092,G407:9092,G408:9092,G409:9092,G410:9092,"\ "G411:9092,G412:9092,G413:9092,G414:9092,G415:9092" producer = KafkaProducer(bootstrap_servers=kafka_endpoint) # Load Spark Context sc = SparkContext(appName='MultiFood_detection') ssc = StreamingContext(sc, 0.08) # odcast(producer)Z oo # Make Spark logging less extensive log4jLogger = sc._jvm.org.apache.log4j log_level = log4jLogger.Level.ERROR log4jLogger.LogManager.getLogger('org').setLevel(log_level) log4jLogger.LogManager.getLogger('akka').setLevel(log_level) log4jLogger.LogManager.getLogger('kafka').setLevel(log_level) logger = log4jLogger.LogManager.getLogger(__name__) # connect to cassandra cluster = Cluster(['G401', 'G402']) # 随意写两个就能找到整个集群
logging.basicConfig(level=logging.INFO) parser = argparse.ArgumentParser(description='Kafka producer') parser.add_argument('--host', type=str, default="localhost", help='Kafka host, default: localhost') parser.add_argument("--port", type=str, default="9092", help="Kafka port, default: 9092") parser.add_argument("--file", type=str, required=True, help="Path to file. required") parser.add_argument("--topic", type=str, required=True, help="Kafka topic. required") args = parser.parse_args() lines = read_file(args.file) server = f'{args.host}:{args.port}' logging.info(f"Connecting to kafka: {server}") kafka_producer = KafkaProducer(bootstrap_servers=server) topic = args.topic logging.info(f"Writing to topic: {topic}") send_messages(kafka_producer, lines, topic) logging.info(f"Stop")
#!/usr/bin/python # This code reads data from an S3 bucket and # passes it to a Kafka topic via a KafkaProducer # Kept essentially identical to # https://github.com/rkhebel/Insight-DE-2018C-Project/blob/master/kafka/producer.py # for ease of comparison from kafka import KafkaProducer import boto3 import botocore import pandas as pd # Producer running on one (and only one) of the Kafka nodes producer = KafkaProducer(bootstrap_servers='localhost:9092') s3 = boto3.resource('s3', aws_access_key_id='', aws_secret_access_key='') bucket = s3.Bucket('deutsche-boerse-xetra-pds') # Loop through objects. Each object.key is a pointer to a csv file for object in bucket.objects.all(): # skip non-trading hours by file size # https://github.com/Deutsche-Boerse/dbg-pds#non-trading-hours-vs-missing-data if object.size > 136: url = 'https://s3.eu-central-1.amazonaws.com/deutsche-boerse-xetra-pds/' + object.key data = pd.read_csv(url) #read through each line of csv and send the line to the kafka topic for index, row in data.iterrows(): output = '' for element in row:
class KafkaKV: def __init__(self, inflight_limit, bootstrap_servers, topic, acks): self.topic = topic self.acks = acks self.bootstrap_servers = bootstrap_servers self.producer = KafkaProducer( bootstrap_servers=bootstrap_servers, request_timeout_ms=1000, #default 30000 max_block_ms=10000, # default 60000 metadata_max_age_ms=5000, #default 300000 acks=acks) self.offset = None self.state = dict() self.consumers = [] self.n_consumers = 0 self.inflight_limit = inflight_limit self.inflight_requests = 0 def catchup(self, state, from_offset, to_offset, cmd, metrics): consumer = None tps = None cid = None init_started = time.time() if len(self.consumers) > 0: consumer, tps, cid = self.consumers.pop(0) else: try: consumer = KafkaConsumer( client_id=uuid.uuid4(), bootstrap_servers=self.bootstrap_servers, request_timeout_ms=1000, enable_auto_commit=False, auto_offset_reset="earliest") except ValueError as e: msg = m("Error on creating consumer", type=str(type(e)), msg=str(e), stacktrace=traceback.format_exc()).with_time() kafkakv_log.info(msg) kafkakv_err.info(msg) kafkakv_stdout.info("Error on creating consumer") raise RequestTimedout() tps = [TopicPartition(self.topic, 0)] consumer.assign(tps) cid = self.n_consumers self.n_consumers += 1 try: metrics["init_us"] = int((time.time() - init_started) * 1000000) catchup_started = time.time() if from_offset is None: consumer.seek_to_beginning(tps[0]) else: consumer.seek(tps[0], from_offset + 1) processed = 0 while consumer.position(tps[0]) <= to_offset: rs = consumer.poll() if tps[0] not in rs: continue for record in rs[tps[0]]: if record.offset > to_offset: break data = json.loads(record.value.decode("utf-8")) processed += 1 if "writeID" not in data: continue if "prevWriteID" in data: if data["key"] not in state: continue current = state[data["key"]] if current["writeID"] == data["prevWriteID"]: state[data["key"]] = { "value": data["value"], "writeID": data["writeID"] } else: state[data["key"]] = { "value": data["value"], "writeID": data["writeID"] } result = None if cmd["key"] in state: result = state[cmd["key"]] kafkakv_log.info( m("caught", cmd=cmd, result=result, base_offset=from_offset, sent_offset=to_offset, processed=processed, cid=cid).with_time()) metrics["catchup_us"] = int( (time.time() - catchup_started) * 1000000) self.consumers.append((consumer, tps, cid)) return state except: try: consumer.close() except: pass raise def execute(self, payload, cmd, metrics): msg = json.dumps(payload).encode("utf-8") offset = self.offset state = copy.deepcopy(self.state) kafkakv_log.info( m("executing", cmd=cmd, base_offset=offset).with_time()) send_started = time.time() written = None try: future = self.producer.send(self.topic, msg) written = future.get(timeout=10) except UnknownTopicOrPartitionError: # well that's (phantom) data loss # how to repro: # topic has replication factor 3 # for each node there is k clients which specifies only it as a bootstrap_servers # start workload # wait ~20 seconds, kill leader # wait 5 seconds, restart former leader # observe UnknownTopicOrPartitionError raise RequestTimedout() except KafkaConnectionError: raise RequestTimedout() except KafkaTimeoutError: raise RequestTimedout() except NotLeaderForPartitionError: raise RequestCanceled() except RequestTimedOutError: raise RequestTimedout() except KafkaError as e: msg = m("Run into an unexpected Kafka error on sending", type=str(type(e)), msg=str(e), stacktrace=traceback.format_exc()).with_time() kafkakv_log.info(msg) kafkakv_err.info(msg) kafkakv_stdout.info("Run into an unexpected Kafka error " + str(type(e)) + ": " + str(e) + " on sending") raise RequestTimedout() except: e, v = sys.exc_info()[:2] stacktrace = traceback.format_exc() msg = m("Run into an unexpected error on sending", type=str(e), msg=str(v), stacktrace=stacktrace).with_time() kafkakv_log.info(msg) kafkakv_err.info(msg) kafkakv_stdout.info("Run into an unexpected error " + str(e) + ": " + str(v) + " @ " + stacktrace + " on sending") raise metrics["send_us"] = int((time.time() - send_started) * 1000000) kafkakv_log.info( m("sent", cmd=cmd, base_offset=offset, sent_offset=written.offset).with_time()) try: state = self.catchup(state, offset, written.offset, cmd, metrics) except NoBrokersAvailable: raise RequestTimedout() except UnknownTopic: raise RequestTimedout() except RequestTimedout: raise except: e, v = sys.exc_info()[:2] stacktrace = traceback.format_exc() msg = m("Run into an unexpected error on catching up", type=str(e), msg=str(v), stacktrace=stacktrace).with_time() kafkakv_log.info(msg) kafkakv_err.info(msg) kafkakv_stdout.info("Run into an unexpected error " + str(e) + ": " + str(v) + " @ " + stacktrace + " on catching up") raise RequestTimedout() if self.offset is None or self.offset < written.offset: base_offset = self.offset self.state = state self.offset = written.offset kafkakv_log.info( m("updated", cmd=cmd, base_offset=offset, root_offset=base_offset, sent_offset=written.offset).with_time()) return state def write(self, key, value, write_id, metrics): if self.inflight_limit <= self.inflight_requests: raise RequestCanceled() else: try: self.inflight_requests += 1 cmd = {"key": key, "value": value, "writeID": write_id} state = self.execute(cmd, cmd, metrics) return state[key] finally: self.inflight_requests -= 1 def read(self, key, read_id, metrics): if self.inflight_limit <= self.inflight_requests: raise RequestCanceled() else: try: self.inflight_requests += 1 state = self.execute({}, { "key": key, "read_id": read_id }, metrics) return state[key] if key in state else None finally: self.inflight_requests -= 1 def cas(self, key, prev_write_id, value, write_id, metrics): if self.inflight_limit <= self.inflight_requests: raise RequestCanceled() else: try: self.inflight_requests += 1 cmd = { "key": key, "prevWriteID": prev_write_id, "value": value, "writeID": write_id } state = self.execute(cmd, cmd, metrics) return state[key] if key in state else None finally: self.inflight_requests -= 1
def main(): scriptusage = 'ingest.py -r <random-seed> -b <batch-size>' randomseed = 34 batchsize = 300 m1max = 100 m2max = 500 basedelay = 2 * 60 * 1000 #2 minutes aggwindowlength = datetime.timedelta(seconds=5) deviceid = str(uuid.uuid4()) try: opts, args = getopt.getopt(sys.argv[1:], "hr:b:", ["random-seed=", "batch-size="]) except getopt.GetoptError: print(scriptusage) sys.exit(2) for opt, arg in opts: if opt == '-h': print(scriptusage) sys.exit() elif opt in ("-r", "--random-seed"): randomseed = int(arg) elif opt in ("-b", "--batch-size"): batchsize = int(arg) print("randomseed={}, batchsize={}", randomseed, batchsize) #connect to Kafka if use_kafka: kproducer = KafkaProducer( bootstrap_servers=os.environ['KAFKA_ADVERTISED_SERVERS']) if use_print: print("Connected a producer to Kafka servers: {}".format( os.environ['KAFKA_ADVERTISED_SERVERS'])) else: kproducer = None #connect to Cassandra if use_cassandra: ccluster = Cluster(['cassandra1', 'cassandra2', 'cassandra3']) csession = ccluster.connect('boontadata') else: ccluster = None csession = None numpy.random.seed(randomseed) df = pandas.DataFrame({ 'measure1': numpy.random.randint(0, m1max, batchsize), 'm2r': numpy.random.rand(batchsize), 'catr': numpy.random.randint(1, 5, batchsize), 'r1': numpy.random.rand(batchsize), 'r2': numpy.random.rand(batchsize), 'r3': numpy.random.rand(batchsize), 'msgid': numpy.arange(0, batchsize, 1, dtype=int), 'devicetime': numpy.array([0] * batchsize, dtype=int), 'sendtime': numpy.array([0] * batchsize, dtype=int), 'patterncode': numpy.array([''] * batchsize) }) df['category'] = df.apply(lambda row: "cat-{}".format(int(row['catr'])), axis=1) df['measure2'] = df.apply(lambda row: row['m2r'] * m2max, axis=1) df['messageid'] = df.apply( lambda row: "{}-{}".format(deviceid, int(row.msgid)), axis=1) df = df.drop(['catr', 'm2r', 'msgid'], axis=1) iappend = batchsize for i in range(0, batchsize): r = df.iloc[i] sendtime = int(round(time.time() * 1000)) patterncode = '' if r.r1 < 0.01: # late arrival, out of order devicetime = int( sendtime - basedelay - int(r.r2 * 1000 * 300) ) #may add up to 300 additional seconds to the base delay patterncode = 'late' # devicetime < sendtime else: devicetime = sendtime df.loc[i, 'devicetime'] = devicetime df.loc[i, 'sendtime'] = sendtime df.loc[i, 'patterncode'] = patterncode senddata(kproducer, csession, r.messageid, deviceid, devicetime, r.category, r.measure1, r.measure2, sendtime, patterncode) if r.r2 < 0.05: #resend a previous message patterncode = 're' # resend previous message resendindex = int(i * r.r1) sendtime = int(round(time.time() * 1000)) rbis = df.iloc[resendindex].copy() senddata(kproducer, csession, rbis.messageid, deviceid, rbis.devicetime, rbis.category, rbis.measure1, rbis.measure2, sendtime, patterncode) rbis.sendtime = sendtime rbis.patterncode = patterncode df.loc[iappend] = rbis iappend += 1 time.sleep(r.r3 / 10) # wait for all kafka messages to be sent if use_kafka: kproducer.flush() # calculate aggregations from the sender point of view and send them to Cassandra df = df.drop(['r1', 'r2', 'r3'], axis=1) df['devicetimewindow'] = df.apply( lambda row: gettimewindow(row.devicetime / 1000, aggwindowlength), axis=1) df['sendtimewindow'] = df.apply( lambda row: gettimewindow(row.sendtime / 1000, aggwindowlength), axis=1) sendaggdata( csession, deviceid, 'devicetime', df.query('patterncode != \'re\'').groupby( ['devicetimewindow', 'category'])['measure1', 'measure2'].sum()) sendaggdata( csession, deviceid, 'sendtime', df.query('patterncode != \'re\'').groupby( ['sendtimewindow', 'category'])['measure1', 'measure2'].sum()) #disconnect from Cassandra if use_cassandra: ccluster.shutdown()
#!/usr/bin/python from kafka import KafkaProducer json = '{"user_action_id":{"long":1346801},"user_id":{"long":243008914},"customer_id":{"long":0},"session_id":{"string":"2icprcma5qp6ch52lk6sbm0ag7"},"remote_addr":{"string":"78.96.2.37"},"forwarded_for":{"string":""},"php_self":{"string":"/search-tools/suggest/products/masini%20de/0"},"keywords":{"string":""},"action":{"string":"ignore_Browsing ProductListing Search suggestProducts"},"category_id":{"long":0},"widget_page_id":{"int":0},"brand_id":{"long":0},"products_id":{"long":0},"time":{"long":1446425827000},"data":{"long":1446422400000},"ora":{"long":25027000},"referer":{"string":"http://m.emag.ro/resigilate/telefoane-mobile-accesorii/listall?ref=ps&emag_click_id=d2a1a979295cae63902266599533373b"},"referer_section":{"string":""},"referer_site":{"string":"m.emag.ro"},"user_agent":{"string":"Mozilla/5.0 (Linux; Android 4.4.4; SM-G530FZ Build/KTU84P) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/33.0.0.0 Mobile Safari/537.36"},"browser_name":{"string":"Chrome"},"browser_version":{"string":"33"},"operating_system":{"string":"AndroidOS"},"device":{"string":"undefined"},"device_type":{"string":"phone"},"click_to":{"long":3168216612},"link_type":{"int":0},"link_id":{"int":0},"response_code":{"int":200},"id_abonat":{"long":0},"timp_generare":{"string":"1.7870"},"cache_age":{"long":0},"ipREGION":{"string":"ALBA"},"ipCITY":{"string":"BLAJ"},"selectedRegion":{"string":""},"selectedCity":{"string":""},"spider_detection_status":{"int":0},"app_esi_call":{"boolean":false},"hostname":{"string":"m.emag.ro"},"lb":{"string":"lb1.emag.ro"},"ab_option":{"string":""},"set_cookie":{"int":0},"user_remember":{"string":"empty"},"products_status":{"int":0},"info_id":{"int":0},"partner_cookie":{"string":"-"}}' producer = KafkaProducer( bootstrap_servers= 'instance-18171.bigstep.io:9092,instance-18169.bigstep.io:9092,instance-18170.bigstep.io:9092' ) for _ in range(100000000): producer.send('clickstreamjson', json.encode('utf-8'))
#!/usr/bin/env python from kafka import KafkaProducer from flask import Flask app = Flask(__name__) event_logger = KafkaProducer(bootstrap_servers='kafka:29092') events_topic = 'events' @app.route("/") def default_response(): event_logger.send(events_topic, 'default'.encode()) return "This is the default response!" @app.route("/purchase_a_sword") def purchase_sword(): # business logic to purchase sword # log event to kafka event_logger.send(events_topic, 'purchased_sword'.encode()) return "Sword Purchased!"
app = Flask(__name__) client = MongoClient("mongodb://*****:*****@app.route("/on_time_performance") def on_time_performance(): carrier = request.args.get('Carrier') flight_date = request.args.get('FlightDate') flight_num = request.args.get('FlightNum') flight = client.agile_data_science.on_time_performance.find_one({ 'Carrier':
import time from json import dumps from kafka import KafkaProducer producer = KafkaProducer(bootstrap_servers=['localhost:9092'], value_serializer=lambda x: dumps(x).encode('utf-8')) for odd in range(100000): if odd % 2 != 0: data = {'B': odd} producer.send('numtest1', value=data) time.sleep(2) producer.flush()
from kafka import KafkaProducer from json import dumps from time import sleep from api_request import ApiRequest # Connect kafka producer producer = KafkaProducer(bootstrap_servers=['localhost:9092'], value_serializer=lambda x: dumps(x).encode('utf-8')) print('Connected to Kafka!') # Stations and their lat/lng for api request stations = [{ 'station_id': 'SE482', 'lat': 33.43628, 'lng': -118.49236 }, { 'station_id': 'SE687', 'lat': 34.51605, 'lng': -120.38485 }, { 'station_id': 'SE793', 'lat': 33.78849, 'lng': -118.37804 }, { 'station_id': 'SE574', 'lat': 34.14406, 'lng': -116.40036 }, { 'station_id': 'SE283', 'lat': 34.90743, 'lng': -118.52388
from kafka import KafkaProducer import time from time import sleep import json def read_json(json_name): data = {} with open(json_name, "r") as json_file: data = json.load(json_file) return data if __name__ == "__main__": producer = KafkaProducer( bootstrap_servers=["127.0.0.1:9092"], value_serializer=lambda x: json.dumps(x).encode('utf-8')) count = 10 i = 0 while (i < count): data = read_json('mobilityoperation.json') strategy_params = "msg_count:" + \ str(i) + ",access: 0,max_accel:1.500000,max_decel: -1.000000,react_time: 4.500000, min_gap: 5.000000, depart_pos: " \ + str(i) + ", turn_direction:straight" data["strategy_params"] = strategy_params timestamp = int(data["metadata"]["timestamp"]) + i data["metadata"]["timestamp"] = str(timestamp) producer.send('v2xhub_mobility_operation_in', value=data) print('Sent a mobilityoperation.') i += 1 producer.flush()
def __init__(self, kafka_host, kafka_topic): self.kafka_host = kafka_host self.kafka_topic = kafka_topic self.producer = KafkaProducer( bootstrap_servers=self.kafka_host, reconnect_backoff_ms=reconnect_backoff_ms_value)
span_name="fetch-price", transport_handler=http_transport_handler, sample_rate=100.0): data = fetch_price(stock) if data: send2_kafka(producer, data) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('symbol', help='the symbol of the stock') parser.add_argument('topic_name', help='the name of the topic') parser.add_argument('kafka_broker', help="the location of the kafka") args = parser.parse_args() symbol = args.symbol topic_name = args.topic_name kafka_broker = args.kafka_broker producer = KafkaProducer(bootstrap_servers=kafka_broker) #stock = get_quote(symbol) schedule.every(1).second.do(fetch_price_and_send, producer, symbol) atexit.register(shutdown_hook, producer) while True: schedule.run_pending() time.sleep(1)
from kafka import KafkaProducer import requests, json, datetime url = 'http://hq.sinajs.cn/list=sh600000,sh600008,sh600009,sh600010,sh600011,sh600015,sh600016,sh600018,sh600019,sh600021,sh600023,sh600028,sh600029,sh600030,sh600031,sh600036,sh600038,sh600048,sh600050,sh600061,sh600066,sh600068,sh600074,sh600085,sh600089,sh600100,sh600104,sh600109,sh600111,sh600115,sh600118,sh600153,sh600157,sh600170,sh600177,sh600188,sh600196,sh600208,sh600219,sh600221,sh600233,sh600271,sh600276,sh600297,sh600309,sh600332,sh600340,sh600352,sh600362,sh600369,sh600372,sh600373,sh600376,sh600383,sh600390,sh600406,sh600415,sh600436,sh600482,sh600485,sh600489,sh600498,sh600518,sh600519,sh600522,sh600535,sh600547,sh600549,sh600570,sh600583,sh600585,sh600588,sh600606,sh600637,sh600649,sh600660,sh600663,sh600674,sh600682,sh600685,sh600688,sh600690,sh600703,sh600704,sh600705,sh600739,sh600741,sh600795,sh600804,sh600816,sh600820,sh600827,sh600837,sh600871,sh600886,sh600887,sh600893,sh600895,sh600900,sh600909,sh600919,sh600926,sh600958,sh600959,sh600977,sh600999,sh601006,sh601009,sh601012,sh601018,sh601021,sh601088,sh601099,sh601111,sh601117,sh601118,sh601155,sh601163,sh601166,sh601169,sh601186,sh601198,sh601211,sh601212,sh601216,sh601225,sh601228,sh601229,sh601288,sh601318,sh601328,sh601333,sh601336,sh601375,sh601377,sh601390,sh601398,sh601555,sh601600,sh601601,sh601607,sh601608,sh601611,sh601618,sh601628,sh601633,sh601668,sh601669,sh601688,sh601718,sh601727,sh601766,sh601788,sh601800,sh601818,sh601857,sh601866,sh601872,sh601877,sh601878,sh601881,sh601888,sh601898,sh601899,sh601901,sh601919,sh601933,sh601939,sh601958,sh601966,sh601985,sh601988,sh601989,sh601991,sh601992,sh601997,sh601998,sh603160,sh603799,sh603833,sh603858,sh603993,sz000001,sz000002,sz000008,sz000060,sz000063,sz000069,sz000100,sz000157,sz000166,sz000333,sz000338,sz000402,sz000413,sz000415,sz000423,sz000425,sz000503,sz000538,sz000540,sz000559,sz000568,sz000623,sz000625,sz000627,sz000630,sz000651,sz000671,sz000686,sz000709,sz000723,sz000725,sz000728,sz000738,sz000750,sz000768,sz000776,sz000783,sz000792,sz000826,sz000839,sz000858,sz000876,sz000895,sz000898,sz000938,sz000959,sz000961,sz000963,sz000983,sz001979,sz002007,sz002008,sz002024,sz002027,sz002044,sz002065,sz002074,sz002081,sz002142,sz002146,sz002153,sz002174,sz002202,sz002230,sz002236,sz002241,sz002252,sz002292,sz002294,sz002304,sz002310,sz002352,sz002385,sz002411,sz002415,sz002424,sz002426,sz002450,sz002456,sz002460,sz002465,sz002466,sz002468,sz002470,sz002475,sz002500,sz002508,sz002555,sz002558,sz002572,sz002594,sz002601,sz002602,sz002608,sz002624,sz002673,sz002714,sz002736,sz002739,sz002797,sz002831,sz002839,sz002841,sz300003,sz300015,sz300017,sz300024,sz300027,sz300033,sz300059,sz300070,sz300072,sz300122,sz300124,sz300136,sz300144,sz300251,s300315' response = requests.get(url) current_date = datetime.datetime.now().strftime("%Y-%m-%d") # current_date = '2018-02-05' producer = KafkaProducer( bootstrap_servers=[ 'hdp2.domain:6667', 'hdp3.domain:6667', 'hdp4.domain:6667' ], api_version=(0, 10, 1), value_serializer=lambda v: json.dumps(v).encode('utf-8')) for line in response.content.decode('gb2312').split(';')[:-1]: # content = line.split('_')[2] content = line.split('_')[2].replace(r'="', ',').replace(r'"', '') if len(content.split(',')) > 30 and content.split(',')[31] == current_date: # print('Try to send message') producer.send('stock-mins', value=content) # producer.flush() # print(content)
return 1 def on_status(self, status): return 1 def on_error(self, status): print(status) def twitt_stream(kafka_producer,topic): # write tweepy function common_time = time.time() # make a record of begining time we open the tweets listener = StdOutListener(int(common_time), kafka_producer,topic) auth = OAuthHandler("PQEim5Uq9jFq3YiMGF12CS7oz", "8gYnr83KbscFqaqE0I5vvGKIjehcVXwGvd43fvR7UL2iEpzhyE") auth.set_access_token("1065559266784878592-9VP0iOYDmVzkD84iaEKNVZHk0jb6fi", "9qETNhtRPrN02QpG4yyTqZnj101HqYPQXViVO5veWm964") stream = Stream(auth, listener) stream.filter(languages=["en"], track=['btc', 'bitcoin', 'BitCoin', 'cryptocurrency']) if __name__ == "__main__": topic=sys.argv[1] producer = KafkaProducer(bootstrap_servers=['localhost:9092'],api_version=(0,1,0)) # create kafka producer instance twitt_stream(producer,topic)
def create_producer(self): if self.config.get("OUT_TOPIC"): self.producer = KafkaProducer( linger_ms=50, bootstrap_servers=[self.config["KAFKA_BROKER_URL"]])
from kafka import KafkaProducer #Greg - before starting - pip install slackclient from slack import AsyncWebClient as wc #Greg -- please change to the Ip of your KAFKA cluster and port please producer = KafkaProducer(bootstrap_servers='localhost:9092') #Greg -- Please use your Oauth token tk = "PLEASE HARD CODE YOUR TOKEN HERE" chan = wc(tk) message_list = chan.api_call("channels.history", channel="random", oldest=0, count="1000") for m in message_list["messages"]: producer.send('random', m) producer.flush(30)
def open_spider(self, spider): self.producer = KafkaProducer(bootstrap_servers=['sentiment01:9092', 'sentiment03:9092'], value_serializer=lambda m: json.dumps(m).encode('ascii'))