def nextTuple(self): race_time = self.myindex # one record per second # I put sleep to simulate the event. It will be 3x faster than the normal race # We need at least 80 seconds data to start anomaly detection. I don't want to wait for first 80 seconds if race_time > 80: time.sleep(0.35) row_data = self.data.loc[self.myindex].values # We can only emit a list. List inside list is not allowed # Example emit_data: ['word', 9, 74.08, 75.02, 73.76, 77.67, 81.24, 74.63, 76.59, 74.61, 72.88, 71.91] # the text 'word' does not matter. If there are multiple multiple bolts to take the text, it will be equally splitted to bolts based on the text # for instance: 'apple' to bolt_instance1, 'banana' to bolt_instance2 # I want them to emitted to a single bolt. Otherwise, I have to consider data order. emit_data = ["word"] emit_data.append(race_time) emit_data = emit_data + row_data.tolist() self.myindex += 1 # start from the beginning if race ends if self.myindex == len(self.data): self.myindex = 0 storm.logInfo("Emiting %s" % str(emit_data)) storm.emit(emit_data)
def process(self, tup): ''' TODO: Task: keep track of the top N words Hint: implement efficient algorithm so that it won't be shutdown before task finished the algorithm we used when we developed the auto-grader is maintaining a N size min-heap ''' word = tup.values[0] count = float(tup.values[1]) new_word_count = WordCount(word, count) if word in self._top_N_map: if count > self._top_N_map[word].count: self._top_N_map[word].count = count heapq.heapify(self._top_N_heap) #adding new elements if the element size is less than 10 elif len(self._top_N_heap) < self._N: self._top_N_map[word] = new_word_count heapq.heappush(self._top_N_heap, new_word_count) #find smallest word and replace it with new word else: smallest_word_count = self._top_N_heap[0] if count > smallest_word_count.count: del (self._top_N_map[smallest_word_count.word]) self._top_N_map[word] = new_word_count heapq.heapreplace(self._top_N_heap, new_word_count) storm.logInfo("Add word: %s, count: %d" % (word, count)) storm.emit(["top-N", self.printvalues()]) pass
def process(self, tup): # Load data from tuple data = tup.values[0] # Analyze data if data['source'] == 'twitter': weight = .995 sentiment = data['data']['sentiment'] overall_sentiment[ 0] = overall_sentiment[0] * weight + sentiment * (1 - weight) elif data['source'] == 'reddit': weight = .85 sentiment = data['data']['sentiment'] overall_sentiment[ 0] = overall_sentiment[0] * weight + sentiment * (1 - weight) elif data['source'] == 'news': weight = .85 sentiment = data['data']['sentiment'] overall_sentiment[ 0] = overall_sentiment[0] * weight + sentiment * (1 - weight) # Get today's date today = date.today() # Store analyzed results in DynamoDB table = dynamodb.Table(config['dynamodb']['analysis']) table.put_item( Item={ 'date': str(today), 'timestamp': str(time.time()), 'sentiment': Decimal(str(overall_sentiment[0])) }) # Emit for downstream bolts storm.emit([data])
def nextTuple(self): time.sleep(0.2) # TODO # Task: randomly generate sentence from sentences string array sentence = random.chioce(SENTENCE) storm.logInfo("Emitting %s" % sentence) storm.emit([sentence])
def process(self, tup): # TODO: # Task: keep track of the top N words word = tup.values[0] count = int(tup.values[1]) new_word_count = WordCountTuple(word, count) if word in self._top_N_map: if count > self._top_N_map[word].count: self._top_N_map[word].count = count heapq.heapify(self._top_N_heap) storm.logInfo("Update word: %s, count: %d" % (word, count)) elif len(self._top_N_heap) < self._N: self._top_N_map[word] = new_word_count heapq.heappush(self._top_N_heap, new_word_count) storm.logInfo("Add word: %s, count: %d" % (word, count)) else: smallest_word_count = self._top_N_heap[0] storm.logInfo( "Current smallest word: %s, count: %d" % (smallest_word_count.word, smallest_word_count.count)) if count > smallest_word_count.count: del (self._top_N_map[smallest_word_count.word]) self._top_N_map[word] = new_word_count heapq.heapreplace(self._top_N_heap, new_word_count) storm.logInfo("Add word: %s, count: %d" % (word, count)) storm.logInfo("Top N: %s" % self.report()) storm.emit(["top-N", self.report()])
def process(self, tup): # Load data from tuple data = tup.values[0] data = json.loads(data) # Get today's date today = date.today() # Analyze data sentiment = get_sentiment_score(data['title'], data['description']) # Store analyzed results in DynamoDB table = dynamodb.Table(config['dynamodb']['news']) parsed_data = { 'date': str(today), 'timestamp': str(data['publishedAt']), 'title': data['title'], 'description': data['description'] if data['description'] != '' else ' ', 'sentiment': Decimal(str(sentiment)) } table.put_item(Item=parsed_data) # Emit for downstream bolts storm.emit([{'source': 'news', 'data': parsed_data}])
def nextTuple(self): docs = self.get_docs() uuid = docs[0]['uuid'] tup = [docs[0]['href'][18:]] self.buffer[uuid] = (tup, 0) emit(tup, id=uuid) sleep(1.0)
def process(self, tup): data = tup.values[0] tweet = ast.literal_eval(data) output = real_time_batch_processing(tweet) table = conn_db.Table("realtime_db") table.put_item(Item=output) storm.emit([output])
def process(self,tup): if tup.values: words = tup.values[0] if words: storm.emit([words]) else: pass
def process(self, tup): tweet = tup.values[0] tweet['tweettext'] = tweet['text'] del tweet['text'] tweet['sentimentscore'] = str( self.checkForHateWords(tweet['tweettext'])) storm.emit([tweet])
def process(self, tup): text = tup.values[1] #language = langid.classify(text)[0] #l = LangID() #l.train() language = self.l.classify(text) storm.emit([tup.values[0], language])
def process(self, tup): segments = tup.values[1].rstrip('/').rsplit('/', 1) path = segments[0] if self.match(segments[-1]) else '/'.join(segments) event = dict( timestamp=tup.values[0], path=path ) kwargs = dict( id=tup.values[2] ) try: # TODO: Retrieve users from all indicies. events = self.es.get(self.index, kwargs['id'], 'user', preference='_primary') kwargs['version'] = events['_version'] body = {'events': events['_source']['events'] + [event]} except NotFoundError: kwargs['op_type'] = 'create' body = {'events': [event]} try: body['rank'] = math.log10(len(body['events'])) / 2 self.es.index(self.index, 'user', body, **kwargs) paths = list(event['path'] for event in body['events']) emit([kwargs['id'], paths]) ack(tup) except TransportError: fail(tup)
def process(self, tup): f = open("/root/Japanese.txt") self.stopwords = f.read().split('\n') f.close() self.m = MeCab.Tagger("-Ochasen") r = redis.Redis(host='localhost', port=6379, db=0) pid = os.getpid() base_path = '/var/log/takatoshi/' logging.basicConfig(filename=base_path + __file__ + '.log', level=logging.DEBUG) logging.debug(datetime.now()) logging.debug("abs path of py file: " + os.path.abspath(__file__)) sentence = tup.values[0] ts = tup.values[1] res = self.m.parse(sentence).splitlines()[:-1] output = set() for _ in res: fa = _.split('\t') if u"名詞" in fa[3] and not fa[0] in self.stopwords: output.add(fa[0]) if len(output): r.incr('total_' + str(ts)) for _ in output: logging.debug("getting noun : " + _) storm.emit([_, ts])
def __do_emit_updated(self, is_updated=True): #if not self.last_output or is_updated: # self.last_output = ', '.join(list(self.the_top.keys())) #storm.emit(['top-N', self.last_output]) #output = ', '.join(list(self.the_top.keys())) output = ', '.join([cand.word for cand in self.top_cands]) storm.emit(['top-N', output])
def process(self, tup): sentence = tup.values[0] sentence = re.sub(r"[,.;!\?]", "", sentence) # get rid of punctuation and num words = jieba.cut(sentence, cut_all=True) for word in words: storm.emit([word])
def process(self, tup): # IDEA: do language classification here already? # IDEA: keep classifiers for several languages in a map # IDEA: batch N classifications (of the same language) for speed? url, meta, text, content, outlinks = tup.values metadata = Metadata(meta) out = [url, metadata, text, content, outlinks] try: lang = metadata['n52.language'][0] except KeyError: lang = 'unknown' if lang != 'en': msg = 'ignoring tuple, as document language {} is not supported'.format( lang) logging.debug(msg) storm.logDebug(msg) else: confidence, clazz = self.classify([text])[0] metadata["n52.classify.class"] = clazz metadata["n52.classify.confidence"] = confidence logging.debug([confidence, clazz, text[100:200], url]) storm.emit(out, anchors=[tup])
def process(self, tup): words = tup.values[0].split('.') cluster, user, job, task, pid, cpu, mem = tuple(words) storm.emit(words) logger.info( 'cluster:%s,username:%s,jobname:%s,taskname:%s,pid:%s,cpu:%s,mem:%s', cluster, user, job, task, pid, cpu, mem)
def nextTuple(self): try: html = urllib2.urlopen(self.url_animals_tl).read() soup = BeautifulSoup(html) scripts = soup.find_all('script') code = scripts[len(scripts) - 1] content = code.contents[0].strip() prefix = 'P.start.start(' i = content.find(prefix) if i != -1: json_str = content[i + len(prefix) : len(content) - 2] json_obj = json.loads(json_str) pins = json_obj['tree']['children'][3]['children'][0]['children'][0]['children'] for pin in pins: pin_id = pin['options']['pin_id'] if 'module' in pin['children'][1]['options']: module = pin['children'][1]['options']['module'] orig_link = pin['data']['link'] orig_host = urlparse(pin['data']['link']).hostname images = pin['data']['images'] if 'orig' in images: pass storm.emit([pin_id, orig_link, orig_host, json.dumps(pin, indent=4, sort_keys=True), self.category]) time.sleep(2) except StopIteration: pass except urllib2.HTTPError, err: if err.code == 404: pass
def process(self, tup): # Split the inbound sentence at spaces words = tup.values[0].split(" ") # Loop over words and emit for word in words: storm.logInfo("Emitting %s" % word) storm.emit([word])
def process(self,tup): # storm.log("Dispel4Py ------> %s: Received block." % (self.script.id, )) try: inputname = self.inputmapping[tup.component][tup.stream] storm.log("Dispel4Py ------> %s: Received block at input '%s'" % (self.script.id, inputname, )) # inputs = tup.values inputs = decode_types(tup.values) outputs = self.script.process( { inputname : inputs }) # storm.log("Dispel4Py ------> %s: Processing complete." % self.scriptname) if outputs is None: return for streamname, output in outputs.iteritems(): result = output if isinstance(output, list) else [output] try: storm.emit(result, stream=streamname) storm.log("Dispel4Py ------> %s: Emitted to stream %s: %s" % (self.script.id, streamname, str(result)[:200])) except TypeError: # encode manually encoded = encode_types(result) storm.emit(encoded, stream=streamname) storm.log("Dispel4Py ------> %s: Emitted to stream %s" % (self.script.id, streamname)) # except: # storm.log("%s: %s" % (self.script.id, traceback.format_exc())) except: storm.log("Dispel4Py ------> %s: %s" % (self.script.id, traceback.format_exc(), ))
def process(self, tup): word = tup.values[0]; if (random() < 0.75): storm.emit([word + 'lalala'], anchors=[tup]) storm.ack(tup) else: storm.log(word + ' randomly skipped!')
def process(self, tup): race_time = tup.values[1] row_data = [tup.values[i + 2] for i in range(10)] storm.logInfo("Inference Bolt data: race_time: %s, data: %s" % (str(race_time), str(row_data))) # restart the event if race_time == 0: self.speed_data = [] self.next_step_data = row_data # we need to have 80 seconds data at least to start detection elif race_time <= self.time_step: self.speed_data.append(self.next_step_data) self.next_step_data = row_data #normal case else: self.speed_data.append(self.next_step_data) self.speed_data.pop() self.next_step_data = row_data input_data = np.expand_dims( self.scaler.transform(np.array(self.speed_data).T), 2) prediction = self.scaler.inverse_transform( self.model.predict(input_data)) anomaly_score = np.abs(prediction[:, 0] - np.array(self.next_step_data)).tolist() #storm.logInfo("race_time:%s speed:%s anomaly_score %s" % (str(race_time), str(self.next_step_data), str(anomaly_score))) emit_data = ["word"] emit_data.append(race_time) emit_data = emit_data + self.next_step_data + anomaly_score storm.logInfo("Inference Bolt emiting: %s" % str(emit_data)) storm.emit(emit_data)
def process(self, tup): # Load data from tuple data = tup.values[0] data = json.loads(data) # Initialize if deque is empty if len(past_prices) == 0: past_prices.extend([float(data['price'].replace(',', ''))] * LOOK_BACK) # Append to rolling deque and compute price else: past_prices.append(float(data['price'].replace(',', ''))) # Check if enough elements exist if len(past_prices) == LOOK_BACK: prediction = predict(MODEL, np.array(past_prices).reshape(1, LOOK_BACK)) # Store predicted results in DynamoDB table = dynamodb.Table(config['dynamodb']['prediction']) table.put_item( Item={ 'timestamp': shift_future(data['timestamp'], FUTURE_OFFSET), 'price': Decimal(str(prediction)), }) # Emit for downstream bolts storm.emit([data])
def process(self, tup): dict_data = tup.values[0] sensor_type = tup.values[1] ordered_dict = self.order_dict(dict_data) #转列表 values_list = list(ordered_dict.values()) time_list = list(ordered_dict.keys()) #训练长度 train_len = int(len(values_list) * 0.85) #预测长度 predict_len = int(len(values_list) * 0.15) #训练数据 train_values = values_list[:train_len] #训练时间序列 train_time = time_list[:train_len] #预测时间序列 predict_time = time_list[-predict_len:] #列表转ndarray nd_values = np.array(train_values, dtype=np.float32) nd_time = np.array(train_time, dtype=np.float32) #训练 predict = self.train_predict(nd_time, nd_values, predict_len) #转字典(转json) predict_dict = dict( zip(predict_time, map(lambda x: round(x, 4), predict[0]))) storm.emit([dict_data, predict_dict, sensor_type])
def process(self, tup): word = tup.values[0] if (random() < 0.75): storm.emit([word + 'lalala'], anchors=[tup]) storm.ack(tup) else: storm.log(word + ' randomly skipped!')
def nextTuple(self): try: input_tuple = None try: if self.counter >= self.script._num_iterations: return except: try: input_tuple = self.script._static_input.pop(0) except AttributeError: # there is no static input pass except IndexError: # static input is empty - no more processing return outputs = self.script.process(input_tuple) if outputs is None: return for streamname, output in outputs.iteritems(): result = output if isinstance(output, list) else [output] storm.emit(result, stream=streamname, id=self.counter) storm.log("Dispel4Py ------> %s: emitted tuple %s to stream %s" % (self.script.id, result, streamname)) self.counter += 1 except: # logging the error but it should be passed to client somehow storm.log("Dispel4Py ------> %s: %s" % (self.scriptname, traceback.format_exc(), ))
def process(self, tuple): ''' Must fulfil the following contract expressed in the Java wrapper: declarer.declare(new Fields(TopologyFields.AUTHOR_SCREEN_NAME, TopologyFields.CREATED_AT, TopologyFields.FAV_COUNT, TopologyFields.HASHTAGS_TEXTS, TopologyFields.IN_REPLY_TO_SCREEN_NAME, TopologyFields.LANG, TopologyFields.RETWEET_COUNT, TopologyFields.RETWEETED, TopologyFields.SOURCE, TopologyFields.PLACE, TopologyFields.POSSIBLY_SENSITIVE, TopologyFields.TEXT, TopologyFields.TOPIC_NAME)); ''' place, topic_name, query = tuple.values try: tweets = list(get_tweets.get_tweets_for_trends(self._twitter_api, [{"query" : query}], popular = True, tweet_processor = self._storm_tweet_processor))[0]["tweets"] except tweepy.TweepError as te: # We have hit the REST API Rate limit for Twitter https://dev.twitter.com/docs/rate-limiting/1.1, no more tweets for some time log_tweeter_error(te, sleep_time=self._rate_limit_sleep_time) return for pt in tweets: # Here we add the trending topic name, and take the place name from those # used internally by get_tweets, instead of the from place names returned by twitter tup = [pt['author_screen_name'], pt['created_at'], pt['favorite_count'], pt['hashtags_texts'], pt['in_reply_to_screen_name'], pt['lang'], pt['retweet_count'], pt['retweeted'], pt['source'], place, pt['possibly_sensitive'], pt['text'], topic_name] storm.emit(tup)
def nextTuple(self): # 停止一段时间(设置状态位) time.sleep(15) batch = 10 bases = ts.get_stock_basics() code_list = bases.index total = code_list.__len__() batch_size = total // batch pool = multiprocessing.Pool(processes=batch) results = [] for i in range(batch + 1): begin_index = i * batch_size end_index = (i + 1) * batch_size if end_index > total: end_index = total batch_data = code_list.tolist().__getslice__( begin_index, end_index) res = pool.apply_async(ts.get_realtime_quotes, (batch_data, )) results.append(res) # get_stock_hist_data_batch(code_list = batch_data,start=start,end=end,sh_df=sh_df,sz_df=sz_df,cyb_df=cyb_df,table_name=table_name) pool.close() pool.join() #等待执行完毕 for item in results: for i, row in item.iterrows(): code = row['code'] sentence = random.choice(SENTENCES) storm.logInfo("Emiting %s" % sentence) storm.logInfo("Emiting code:%s row:%s" % (code, row)) storm.emit([code, row])
def emit(self): if not self.entity: self.entity = self.getLastEntity() # We always emit tuple = (entity, "{json string}") storm.emit([self.entity,json.dumps(self.d)])
def process(self, tuple): id_tweet, text = tuple.values storm.logInfo("LT3BOLTINFO") storm.logInfo(text) json = get_res(text.encode('utf-8')) ''.join(json) json = json.split('\n')[-2] json_string = json.replace("'", '"') data = simplejson.loads(json_string) data['id'] = str(id_tweet) data['source'] = "LT3" data['info'] = text if (data['relevance_boolean'] == 1 and data['severity_boolean'] == 1): data['flag'] = "LT3" else: data['flag'] = "none" del data['relevance_boolean'] del data['severity_boolean'] json_string = simplejson.dumps(data) storm.emit([json_string])
def nextTuple(self): try: input_tuple = None try: input_tuple = self.script._static_input.pop(0) except AttributeError: # there is no static input if self.counter >= self.script._num_iterations: return except IndexError: # static input is empty - no more processing return storm.log("Dispel4Py ------> %s: input %s" % ( self.scriptname, input_tuple, )) outputs = self.script.process(input_tuple) if outputs is None: return for streamname, output in outputs.iteritems(): result = output if isinstance(output, list) else [output] storm.emit(result, stream=streamname, id=self.counter) storm.log( "Dispel4Py ------> %s: emitted tuple %s to stream %s" % (self.script.id, result, streamname)) self.counter += 1 except: # logging the error but it should be passed to client somehow storm.log("Dispel4Py ------> %s: %s" % ( self.scriptname, traceback.format_exc(), ))
def process(self, tup): #TO DO: Add check for empty values if tup.values[0]: words = tup.values[0].split(" ") if words: for word in words: storm.emit([word])
def process(self, tup): # storm.log("Dispel4Py ------> %s: Received block." % (self.script.id, )) try: inputname = self.inputmapping[tup.component][tup.stream] storm.log("Dispel4Py ------> %s: Received block at input '%s'" % ( self.script.id, inputname, )) # inputs = tup.values inputs = decode_types(tup.values) outputs = self.script.process({inputname: inputs}) # storm.log("Dispel4Py ------> %s: Processing complete." % self.scriptname) if outputs is None: return for streamname, output in outputs.iteritems(): result = output if isinstance(output, list) else [output] try: storm.emit(result, stream=streamname) storm.log( "Dispel4Py ------> %s: Emitted to stream %s: %s" % (self.script.id, streamname, str(result)[:200])) except TypeError: # encode manually encoded = encode_types(result) storm.emit(encoded, stream=streamname) storm.log("Dispel4Py ------> %s: Emitted to stream %s" % (self.script.id, streamname)) # except: # storm.log("%s: %s" % (self.script.id, traceback.format_exc())) except: storm.log("Dispel4Py ------> %s: %s" % ( self.script.id, traceback.format_exc(), ))
def process(self, tup): url = tup.values[0] storm.log("HARing "+url) output = self.get_har_with_image(url) if output is not "FAIL": storm.emit(output, anchors=[tup]) storm.ack(tup)
def process(self, tup): sentence = tup.values[0].lower() words = sentence.split(" ") bow = numpy.zeros((self._n,)) for w in words: bow[self.map[w]]+=1 storm.emit([bow.tolist()])
def nextTuple(self): id = "periodic_%s" % str(uuid4()) body = json.dumps({'message_id': CHECK_METRIC_ALARM_MSG_ID}) message = "Periodic monitoring message sent [%s] %s" self.log(message % (id, body)) emit([None, body], id=id) time.sleep(60)
def process(self, tup): # Split the inbound sentence at spaces words = tup.values[0].split(" ") # Loop over words and emit for word in words: storm.logInfo("Emitting-----> %s" % word) storm.emit([word])
def process(self, tup): segments = tup.values[1].rstrip('/').rsplit('/', 1) path = segments[0] if self.match(segments[-1]) else '/'.join(segments) event = dict(timestamp=tup.values[0], path=path) kwargs = dict(id=tup.values[2]) try: # TODO: Retrieve users from all indicies. events = self.es.get(self.index, kwargs['id'], 'user', preference='_primary') kwargs['version'] = events['_version'] body = {'events': events['_source']['events'] + [event]} except NotFoundError: kwargs['op_type'] = 'create' body = {'events': [event]} try: body['rank'] = math.log10(len(body['events'])) / 2 self.es.index(self.index, 'user', body, **kwargs) paths = list(event['path'] for event in body['events']) emit([kwargs['id'], paths]) ack(tup) except TransportError: fail(tup)
def process(self, tup): # ********** Read input arguments ********** try: input_args = json.loads(tup.values[0]) except: input_args = tup.values[0] ret_info = tup.values[1] # ********** Calculate results ********** # Construct required variables if "query_type" not in input_args: return storm.emit([json.dumps({"ok": False, "msg": "No `query_type` param provided!"}), ret_info]) query_type = input_args["query_type"].lower() if query_type == "collections": answer = self.handler.describe_collections('cenote', input_args["PROJECT_ID"]) return storm.emit([json.dumps({"ok": True, "msg": answer}), ret_info]) timeframe_start = "" timeframe_end = "" if "timeframe_start" in input_args: timeframe_start = input_args["timeframe_start"] if "timeframe_end" in input_args: timeframe_end = input_args["timeframe_end"] info = { "cenote": { "url": "/projects/" + input_args["PROJECT_ID"] + "/queries/" + input_args["event_collection"] + "/extraction", "timeframe_start": timeframe_start, "timeframe_end": timeframe_end } } columns = None if "target_property" in input_args: columns = input_args["target_property"].split(",") # Execute corresponding query if query_type == "extraction": answer = self.reader.read_data("cenote", columns, json.dumps(info)) elif query_type in ["count", "min", "max", "sum", "average", "median"]: answer = self.reader.perform_operation("cenote", columns, query_type, json.dumps(info)) elif query_type == "percentile": info["cenote"]["percentile"] = int(input_args["percentile"]) answer = self.reader.perform_operation("cenote", columns, query_type, json.dumps(info)) else: answer = {"data": "Not implemented yet!"} # Return results if "response" in answer and answer["response"] == 200: # Hack-ia to turn "system.<someoperation>(<column>)" to "<column>" answer = json.loads(re.sub(r'system\.\w*\(|\)', "", json.dumps(answer, default=datetimeParser))) return storm.emit([json.dumps({"ok": True, "msg": answer["data"]}), ret_info]) else: try: problem = answer["exception"] except: problem = answer["data"] return storm.emit([json.dumps({"ok": False, "msg": problem}), ret_info])
def process(self, tup): '''We serialize the input and output by json for convenience.''' try: data = array(json.loads(tup.values[1])) result = self.model.predict(data) storm.emit([tup.values[0], json.dumps(result.tolist())]) except: traceback.print_exc(file=open('/tmp/trace_svm_bolt.txt', 'a'))
def process(self, tup): word = tup.values[0] count = self.words.get(word) if count == None: count = 0 count = count + 1 self.words[word] = count storm.emit([word, str(count)])
def fail(self, msg_id): tup, retries = self.buffer[msg_id] if retries >= 5: del self.buffer[msg_id] log('[RabbitMQSpout] Message %s failed for good.' % msg_id) else: self.buffer[msg_id] = (tup, retries + 1) emit(tup, id=msg_id)
def fail(self, cnt_id): tup, retries = self.buffer[cnt_id] if retries >= 5: del self.buffer[cnt_id] log('[ZonAPISpout] Message %s failed for good.' % cnt_id) else: self.buffer[cnt_id] = (tup, retries + 1) emit(tup, id=cnt_id)
def process(self, tup): #TO DO: Add check for empty values if tup.values[0]: #"Try-Expect" will cause storm workers die words = tup.values[0].split(" ") if words: for word in words: storm.emit([word])
def process(self, tup): print "SplitSentenceBolt.process:tup " + str(tup) print "SplitSentenceBolt.process:tup.values[0] " + str(tup.values[0]) words = tup.values[0].split(" ") print "SplitSentenceBolt.process:words " + str(words) for word in words: storm.emit([word]) print "SplitSentenceBolt.process:emit " + str(word)
def nextTuple(self): time.sleep(0.2) # TODO # Task: randomly generate sentence from sentences string array # Note: only generate one sentence in this function sentence = random.choice(SENTENCES) storm.logInfo("Emiting %s" % sentence) storm.emit([sentence])
def nextTuple(self): try: line = self.f.readline() storm.emit([line], id=self.tuple_id) self.tuple_id += 1 except EOFError: exit()
def process(self,tuple): tweetid = tuple.values[0] tweet_date = tuple.values[1] tweettext = tuple.values[2] country = tuple.values[3] hashtags = tuple.values[4] tweet_text = tuple.values[5] score = afinn.score(tweettext) storm.emit([tweetid, tweet_date, score, country, hashtags, tweet_text])
def process(self, tuple): id = tuple.values[0] url = tuple.values[1] # open("/home/roy/output.txt", "a").write(url + '\n') if url in TWEETERS_DB: tweeters = TWEETERS_DB[url] # open("/home/roy/output.txt", "a").write(str(tweeters) + '\n') for tweeter in tweeters: storm.emit([id, tweeter])
def process(self, tup): # Get the word from the inbound tuple word = tup.values[0] # Increment the counter self._counter[word] +=1 count = self._counter[word] storm.logInfo("Emitting %s:%s" % (word, count)) # Emit the word and count storm.emit([word, count])
def nextTuple(self): if self.count < 2: word = choice(words) id = str(uuid4()) self.pending[id] = word storm.rpcMetrics("my-custom-shellspout-metric", 1) self.count = self.count + 1 storm.log("TesterSpout update my-custom-shellspout-metric "+str(self.count)) storm.emit([word], id=id)
def process(self, tuple): id = tuple.values[0] tweeter = tuple.values[1] # open("/home/roy/output.txt", "a").write(str(tuple) + '\n') if tweeter in FOLLOWERS_DB: followers = FOLLOWERS_DB[tweeter] # open("/home/roy/output.txt", "a").write(str(followers) + '\n') for follower in followers: storm.emit([id, follower])
def process(self, tuple): word = tuple.values[0] if word not in self.counts: count = 0 else: count = self.counts[word] count += 1 self.counts[word] = count storm.emit([word, count])
def write(self, output): result = output if isinstance(output, list) else [output] try: storm.emit(result, stream=self.streamname) storm.log("Dispel4Py ------> Emitted to stream %s." % (self.scriptname, self.streamname)) except TypeError: # encode manually encoded = encode_types(result) storm.emit(encoded, stream=self.streamname) storm.log("Dispel4Py ------> Emitted to stream %s." % (self.scriptname, self.streamname))
def process(self, tup): word = tup.values[0] if self.counts.has_key(word): count = self.counts[word] else: count = 0 count += 1 self.counts[word] = count storm.log(str(word)+" "+str(count)) storm.emit([word, count])
def process(self, tuple): place = tuple.values[0] try: trends = get_tweets.get_trending_topics_text(self._twitter_api, place) except tweepy.TweepError as te: # We have hit the REST API Rate limit for Twitter https://dev.twitter.com/docs/rate-limiting/1.1, no more tweets for some time log_tweeter_error(te, sleep_time=self._rate_limit_sleep_time) return for trend in trends: storm.emit([place, trend['name'], trend['query']])