def nextTuple(self): # 停止一段时间(设置状态位) time.sleep(15) batch = 10 bases = ts.get_stock_basics() code_list = bases.index total = code_list.__len__() batch_size = total // batch pool = multiprocessing.Pool(processes=batch) results = [] for i in range(batch + 1): begin_index = i * batch_size end_index = (i + 1) * batch_size if end_index > total: end_index = total batch_data = code_list.tolist().__getslice__( begin_index, end_index) res = pool.apply_async(ts.get_realtime_quotes, (batch_data, )) results.append(res) # get_stock_hist_data_batch(code_list = batch_data,start=start,end=end,sh_df=sh_df,sz_df=sz_df,cyb_df=cyb_df,table_name=table_name) pool.close() pool.join() #等待执行完毕 for item in results: for i, row in item.iterrows(): code = row['code'] sentence = random.choice(SENTENCES) storm.logInfo("Emiting %s" % sentence) storm.logInfo("Emiting code:%s row:%s" % (code, row)) storm.emit([code, row])
def process(self, tup): # Split the inbound sentence at spaces words = tup.values[0].split(" ") # Loop over words and emit for word in words: storm.logInfo("Emitting %s" % word) storm.emit([word])
def process(self, tup): race_time = tup.values[1] speed_data = [tup.values[i+2] for i in range(10)] anomaly_score = [tup.values[i+10] for i in range(10)] requests.post("http://127.0.0.1:5000", data=json.dumps({'race_time':race_time, 'speed_data': speed_data, 'anomaly_score': anomaly_score})) storm.logInfo("SendWebsServer Bolt data: race_time: %s, speed_data: %s, anomaly_data:%s" % (str(race_time),str(speed_data), str(anomaly_score))) storm.logInfo("SendWebsServer Bolt r: %s" % str(r))
def process(self, tup): # TODO: # Task: keep track of the top N words word = tup.values[0] count = int(tup.values[1]) new_word_count = WordCountTuple(word, count) if word in self._top_N_map: if count > self._top_N_map[word].count: self._top_N_map[word].count = count heapq.heapify(self._top_N_heap) storm.logInfo("Update word: %s, count: %d" % (word, count)) elif len(self._top_N_heap) < self._N: self._top_N_map[word] = new_word_count heapq.heappush(self._top_N_heap, new_word_count) storm.logInfo("Add word: %s, count: %d" % (word, count)) else: smallest_word_count = self._top_N_heap[0] storm.logInfo( "Current smallest word: %s, count: %d" % (smallest_word_count.word, smallest_word_count.count)) if count > smallest_word_count.count: del (self._top_N_map[smallest_word_count.word]) self._top_N_map[word] = new_word_count heapq.heapreplace(self._top_N_heap, new_word_count) storm.logInfo("Add word: %s, count: %d" % (word, count)) storm.logInfo("Top N: %s" % self.report()) storm.emit(["top-N", self.report()])
def initialize(self, conf, context): self._conf = conf self._context = context storm.logInfo("Inference bolt instance starting...") self.overall_rank = [] self.last_laptime = [] self.track_status = [] self.pit_stop_count = [] self.completed_laps = [] self.elapsed_time = [] self.best_laptime = [] self.time_behind_leader = [] self.time_behind_prec = [] self.overall_best_laptime = [] self.last_pitted_lap = [] self.start_position = [] self.laps_led = [] self.best_lap = [] self.laps_behind_leader = [] self.laps_behind_prec = [] self.time_step = 10 ############################################################################################# ## change 'sakkas' with your username self.model = tf.keras.models.load_model( '~/Storm/lap_time.prediction.h5')
def process(self, tup): ''' TODO: Task: keep track of the top N words Hint: implement efficient algorithm so that it won't be shutdown before task finished the algorithm we used when we developed the auto-grader is maintaining a N size min-heap ''' word = tup.values[0] count = float(tup.values[1]) new_word_count = WordCount(word, count) if word in self._top_N_map: if count > self._top_N_map[word].count: self._top_N_map[word].count = count heapq.heapify(self._top_N_heap) #adding new elements if the element size is less than 10 elif len(self._top_N_heap) < self._N: self._top_N_map[word] = new_word_count heapq.heappush(self._top_N_heap, new_word_count) #find smallest word and replace it with new word else: smallest_word_count = self._top_N_heap[0] if count > smallest_word_count.count: del (self._top_N_map[smallest_word_count.word]) self._top_N_map[word] = new_word_count heapq.heapreplace(self._top_N_heap, new_word_count) storm.logInfo("Add word: %s, count: %d" % (word, count)) storm.emit(["top-N", self.printvalues()]) pass
def process(self, tup): race_time = tup.values[1] row_data = [tup.values[i + 2] for i in range(10)] storm.logInfo("Inference Bolt data: race_time: %s, data: %s" % (str(race_time), str(row_data))) # restart the event if race_time == 0: self.speed_data = [] self.next_step_data = row_data # we need to have 80 seconds data at least to start detection elif race_time <= self.time_step: self.speed_data.append(self.next_step_data) self.next_step_data = row_data #normal case else: self.speed_data.append(self.next_step_data) self.speed_data.pop() self.next_step_data = row_data input_data = np.expand_dims( self.scaler.transform(np.array(self.speed_data).T), 2) prediction = self.scaler.inverse_transform( self.model.predict(input_data)) anomaly_score = np.abs(prediction[:, 0] - np.array(self.next_step_data)).tolist() #storm.logInfo("race_time:%s speed:%s anomaly_score %s" % (str(race_time), str(self.next_step_data), str(anomaly_score))) emit_data = ["word"] emit_data.append(race_time) emit_data = emit_data + self.next_step_data + anomaly_score storm.logInfo("Inference Bolt emiting: %s" % str(emit_data)) storm.emit(emit_data)
def nextTuple(self): race_time = self.myindex # one record per second # I put sleep to simulate the event. It will be 3x faster than the normal race # We need at least 80 seconds data to start anomaly detection. I don't want to wait for first 80 seconds if race_time > 80: time.sleep(0.35) row_data = self.data.loc[self.myindex].values # We can only emit a list. List inside list is not allowed # Example emit_data: ['word', 9, 74.08, 75.02, 73.76, 77.67, 81.24, 74.63, 76.59, 74.61, 72.88, 71.91] # the text 'word' does not matter. If there are multiple multiple bolts to take the text, it will be equally splitted to bolts based on the text # for instance: 'apple' to bolt_instance1, 'banana' to bolt_instance2 # I want them to emitted to a single bolt. Otherwise, I have to consider data order. emit_data = ["word"] emit_data.append(race_time) emit_data = emit_data + row_data.tolist() self.myindex += 1 # start from the beginning if race ends if self.myindex == len(self.data): self.myindex = 0 storm.logInfo("Emiting %s" % str(emit_data)) storm.emit(emit_data)
def process(self, tup): # Split the inbound sentence at spaces words = tup.values[0].split(" ") # Loop over words and emit for word in words: storm.logInfo("Emitting-----> %s" % word) storm.emit([word])
def nextTuple(self): time.sleep(0.2) # TODO # Task: randomly generate sentence from sentences string array sentence = random.chioce(SENTENCE) storm.logInfo("Emitting %s" % sentence) storm.emit([sentence])
def process(self, tuple): id_tweet, text = tuple.values storm.logInfo("LT3BOLTINFO") storm.logInfo(text) json = get_res(text.encode('utf-8')) ''.join(json) json = json.split('\n')[-2] json_string = json.replace("'", '"') data = simplejson.loads(json_string) data['id'] = str(id_tweet) data['source'] = "LT3" data['info'] = text if (data['relevance_boolean'] == 1 and data['severity_boolean'] == 1): data['flag'] = "LT3" else: data['flag'] = "none" del data['relevance_boolean'] del data['severity_boolean'] json_string = simplejson.dumps(data) storm.emit([json_string])
def initialize(self, conf, context): self._conf = conf self._context = context storm.logInfo("Counter bolt instance starting...") self._top_words = Counter() self._N = 10 self._top_N_map = {} self._top_N_heap = []
def nextTuple(self): time.sleep(0.2) # TODO # Task: randomly generate sentence from sentences string array # Note: only generate one sentence in this function sentence = random.choice(SENTENCES) storm.logInfo("Emiting %s" % sentence) storm.emit([sentence])
def process(self, tup): # Split the inbound sentence at spaces words = re.split('[^a-zA-Z0-9-]', tup.values[0]) # words = tup.values[0].split() # Loop over words and emit for word in words: if word: storm.logInfo("Emitting %s" % word) storm.emit([word])
def process(self, tup): # Get the word from the inbound tuple word = tup.values[0] # Increment the counter self._counter[word] +=1 count = self._counter[word] storm.logInfo("Emitting &&&&&&&&&&&& %s:%s" % (word, count)) # Emit the word and count storm.emit([word, count])
def process(self, tup): # Get the word from the inbound tuple word = tup.values[0] # Increment the counter self._counter[word] +=1 count = self._counter[word] storm.logInfo("Emitting %s:%s" % (word, count)) # Emit the word and count storm.emit([word, count])
def initialize(self, conf, context): self._conf = conf self._context = context storm.logInfo("Counter bolt instance starting...") # TODO: # Task: set N pass
def process(self, tup): # TODO: # Task 1: make the words all lower case storm.logInfo("Received tuple %s" % tup) line = tup.values[0] line = line.lower() # Task 2: remove the common words if line not in self._common_words and line != " ": storm.logInfo("Normalized word %s" % line) storm.emit([line])
def initialize(self, conf, context): self._conf = conf self._context = context self._topic = 'temptopic_words' self._producer = KafkaProducer( bootstrap_servers=[ '10.78.68.45:9092', '10.78.68.46:9092', '10.78.68.47:9092' ], value_serializer=lambda m: json.dumps(m).encode('utf-8')) storm.logInfo("Split bolt instance starting...")
def initialize(self, conf, context): self._conf = conf self._context = context self._complete = False storm.logInfo("Spout instance starting...") # TODO: # Task: Initialize the file reader pass
def process(self, tup): # TODO # Task: word count # Hint: using instance variable to tracking the word count words = tup.values[0] self._counter[word] + = 1 count = self._counter[word] storm.logInfo("Emitting %s:%s" % (word, count)) storm.emit([word, count])
def nextTuple(self): self._index += 1 for msg in self._consumer: words = msg.value["data"] for word in words: tuple_id = str(uuid.uuid4()) record = {"data": [word], "fail_count": 0} self._tuples[tuple_id] = record storm.logInfo("index %d - emiting: %s" % (self._index, word)) storm.emit([word], id=tuple_id)
def process(self, tup): # TODO # Task: split sentence and emit words # Hint: split on "[^a-zA-Z0-9-]" storm.logInfo("The tuple has format %s" % tup) line = tup.values[0] line = re.sub(r"[^a-zA-Z0-9-]", ' ', line) line = line.split() for word in line: storm.logInfo("Emitting %s" % word) storm.emit([word])
def init_model_when_necessary(self, station, query_type): if not self.dictionary.has_key(station): self.dictionary[station] = {} storm.logInfo("station["+station+"] is initialized!") if not self.dictionary[station].has_key(query_type): self.dictionary[station][query_type] = self.model_creation_mapping_table[query_type](station) storm.logInfo("station["+station+","+ str(query_type)+"] is initialized!") # TODO: this function should return if there is any error in the model initialization. return 1
def process(self, tup): # TODO: # Task 1: make the words all lower case # Task 2: remove the common words word = tup.values[0] if word: word = word.lower() if word not in self._common_words: storm.logInfo("Normalize %s" % word) storm.emit([word])
def initialize(self, conf, context): self._conf = conf self._context = context self._complete = False storm.logInfo("Spout instance starting...") # TODO: # Task: Initialize the file reader self._path = conf['input'] self._file_reader = open(self._path, 'r')
def process(self, tup): # Split the inbound sentence at spaces # process touples here # data = json.loads(tup) # words = tup.values[0].split() # # Loop over words and emit # for word in words: storm.logInfo("Emitting from DB BOLT") # storm.emit([word]) storm.emit([tup])
def process(self, tup): # Process block data here data = tup.values[0] data = json.loads(data) data = json.loads(data) blockData = data["result"] trsansaction = blockData["transactions"] storm.logInfo("Emitting from Transaction BOLT") # storm.emit([word]) storm.emit([trsansaction])
def nextTuple(self): # TODO: # Task 1: read the next line and emit a tuple for it # Task 2: don't forget to sleep for 1 second when the file is # entirely read to prevent a busy-loop line = self._f.readline() if line: storm.logInfo("Emiting %s" % line) storm.emit([line]) else: sleep(1)
def initialize(self, conf, context): self._conf = conf self._context = context storm.logInfo("Top-N bolt instance starting...") # TODO: # Task: set N self._nvalue = conf['topValue'] # End self._countmap = {} self._max = 0 self._min = 0
def getTimeValue(timestamp, entity, database): doc = database[entity].find_one({'Time': timestamp}) doc = json.dumps(doc, sort_keys=True, indent=4, default=json_util.default) data = json.loads(doc) if data != None: storm.logInfo( "-------------------------------------------------------------------> ", data["Value"]) return data["Value"] else: storm.logInfo("-------> 0" + str(timestamp)) return 0
def initialize(self, conf, context): self._conf = conf self._context = context self._common_words = [ "the", "be", "a", "an", "and", "of", "to", "in", "am", "is", "are", "at", "not", "that", "have", "i", "it", "for", "on", "with", "he", "she", "as", "you", "do", "this", "but", "his", "by", "from", "they", "we", "her", "or", "will", "my", "one", "all", "s", "if", "any", "our", "may", "your", "these", "d", " ", "me", "so", "what", "him", "their" ] storm.logInfo("Normalizer bolt instance starting...")
def initialize(self, conf, context): self._conf = conf self._context = context self._complete = False storm.logInfo("Spout instance starting...") #self._myreaderfile = self._conf[input.file] self._myreadfilepointer = open("/tmp/data.txt") #storm.logInfo("%s",self._myreaderfile) #storm.logInfo("see this f****d up thing") # TODO: # Task: Initialize the file reader pass
def process(self, tuple): tweet_id = tuple.values["tweet-id"] sentiment = tuple.values["sentiment"] country = tuple.values["country"] hashtags = tuple.values["hashtags"] storm.logInfo("Received tweet with tweet id: "+str(tweet_id)) for h in hashtags: try: db_obj = Hashtag.get(hashtag=h) db_obj_count = db_obj.count db_obj_overall_sentiment = (((db_obj.overall_sentiment * db_obj_count) + sentiment) / (db_obj.count + 1)) db_obj_country_sentiment = db_obj.country_sentiment["country"] db_obj_country_sentiment = (((db_obj_country_sentiment["sentiment"] * db_obj_country_sentiment["count"]) + sentiment) / (db_obj.country_sentiment["count"] + 1)) db_obj_country_sentiment["count"] += 1 db_obj.count += 1 db_obj.overall_sentiment = db_obj_overall_sentiment db_obj.country_sentiment = db_obj_country_sentiment db_obj.save() storm.logInfo("Updating Hashtag: "+h) except: db_obj = Hashtag(hashtag=h, overall_sentiment=sentiment, country_sentiment={country: { "sentiment": sentiment, "count": 1}}, count=1) db_obj.save() storm.logInfo("Inserting New Hashtag: "+h)
def initialize(self, conf, context): self._conf = conf self._context = context # Create a new counter for this instance self._counter = Counter() storm.logInfo("Counter bolt instance starting...")
def initialize(self, conf, context): self._conf = conf self._context = context storm.logInfo("Spout instance starting...")
def nextTuple(self): # Emit a random sentence sentence = random.choice(SENTENCES) storm.logInfo("Emiting %s" % sentence) storm.emit([sentence])