def process(data): rabbit_publish = utils.RabbitClient(queue=PUBLISH, host='rabbitmq') data['ner_info'] = {} for sid, sent in data['sents'].iteritems(): try: print(sent) tokens = tokenize(sent['text']) entities = NER.extract_entities(tokens) new_ents = [] for e in entities: #MITIE returns xrange iters. Convert to tuples of ints r = (e[0].__reduce__()[1][0], e[0].__reduce__()[1][1]) tag = e[1] score = e[2] new_ents.append((r, tag, score)) data['sents'][sid]['tokens'] = tokens data['ner_info'][sid] = new_ents except Exception as e: # If something goes wrong, log it and return nothing logger.info(e) # Make sure to update this line if you change the variable names logger.info('Finished processing content.') rabbit_publish.send(data, PUBLISH)
def process(data, tfidf, clf): rabbit_publish = utils.RabbitClient(queue=PUBLISH, host='rabbitmq') try: mat = tfidf.transform([data['title']]) pred = clf.predict(mat) data['predicted_relevancy'] = pred[0] logger.info('Finished processing content.') except Exception as e: # If something goes wrong, log it and return nothing logger.info(e) # Make sure to update this line if you change the variable names rabbit_publish.send(data, PUBLISH)
def post(self): args = self.reqparse.parse_args() rabbit = utils.RabbitClient(queue=PUBLISH, host='rabbitmq') logger.info('Received data...') data = utils.prep_data(args['data']) key = hashlib.sha1(''.join(data['sents'])).hexdigest() data['pipeline_key'] = key logger.info('Sending downstream with key {}...'.format(key)) rabbit.send(data, PUBLISH) logging.info('Sent {}'.format(key)) return key
def post(self): args = self.reqparse.parse_args() rabbit = utils.RabbitClient(queue=PUBLISH, host='rabbitmq') logger.info('Received data...') data = args['data'] data = utils.prep_data(data) pipeline_key = str(uuid.uuid4()) data['pipeline_key'] = pipeline_key logger.info('Sending to the downstream with key {}...'.format(pipeline_key)) rabbit.send(data, PUBLISH) logging.info('Sent {}'.format(pipeline_key)) return pipeline_key
def process(data): rabbit_publish = utils.RabbitClient(queue=PUBLISH, host='rabbitmq') data['predicate_info'] = {} for sid, sent in data['sents'].iteritems(): try: output = ParseyPredFace.parse(sent['text'].encode('utf-8')) data['predicate_info'][sid] = output except Exception as e: # If something goes wrong, log it and return nothing logger.info(e) # Make sure to update this line if you change the variable names logger.info('Finished processing content.') rabbit_publish.send(data, PUBLISH)
def extract(message): rabbit_publish = utils.RabbitClient(queue=PUBLISH, host='rabbitmq') story = message keys = story['event_info'].keys() #keys = [k for k in keys if k != 'predicted_relevancy'] for val in keys: logger.info('Processing {}'.format(val)) text = story['event_info'][val]['sent']['text'] text = text.encode('utf-8') event_dict = send_to_corenlp(story, text) try: events_r = send_to_petr(event_dict) except Exception as e: logger.info('There was an exception with PETR. {}\n'.format(e)) events_r = {} try: # event_updated = process_results(events_r.json()) event_updated = events_r.json() story['event_info'][val]['coded'] = [] for e in event_updated: if e: story['event_info'][val]['coded'].append(e) else: pass #logger.info(json.dumps(story)) except: logger.exception('Something went wrong in the formatting.') logger.info(json.dumps(events_r.json())) rabbit_publish.send(story, PUBLISH)
def main(): rabbit_consume = utils.RabbitClient(queue=CONSUME, host='rabbitmq') rabbit_consume.receive(callback)
def publish(data): client = utils.RabbitClient(queue=PUBLISH, host='rabbitmq') client.send(data, PUBLISH)