class ParserWorkerThread(threading.Thread): """ Instances of this class examine messages and fill in the title for any urls in the message """ def __init__(self, thread_id, in_q, out_q, timeout=1): threading.Thread.__init__(self) self.daemon = True self.name = "Worker %d" % thread_id self._logger = logging.getLogger(self.name) self._in_q = in_q self._out_q = out_q self._timeout = timeout self._stopped = threading.Event() # Use a parser to do this lookup self._parser = HipChatParser() def run(self): self._logger.debug('Worker starting') while not self._stopped.is_set(): try: item = self._in_q.get(True, self._timeout) self._worker_process(item) self._in_q.task_done() except Queue.Empty: # After sufficient time with no items being on the queue, we should probably reclaim this thread pass self._logger.debug('Worker stopping') def join(self, timeout=None): """ Stop all processing on this thread """ self._stopped.set() super(ParserWorkerThread, self).join(timeout) def _worker_process(self, msg): """ Do the actual work of looking up titles of urls in the given message. If the message changes as a result of those lookups, dispatch an updated version of the message. """ self._logger.debug('Processing: %s', msg) original_json = msg.details_as_json self._lookup_costly_details(msg) msg.details_as_json = self._parser.dict_to_json(msg.details) # Only dispatch an update if the details have changed if original_json != msg.details_as_json: self._out_q.put(msg) def _lookup_costly_details(self, msg): """ Fill in whatever details we can about the message. """ for d in msg.details[HipChatParser.DETAIL_LINKS]: d[HipChatParser.DETAIL_TITLE] = self._parser.fetch_title(d[HipChatParser.DETAIL_URL])
def test_Parse_Mentions_Single_AtEnd(self): p = HipChatParser() s = 'you around? @chris' t = ('{\n' ' "mentions": [\n' ' "chris"\n' ' ]\n' '}') self.assertEqual(p.parse(s), t)
def test_Parse_Emoticons_Multiple(self): p = HipChatParser() s = 'Good morning! (megusta) (coffee)' t = ('{\n' ' "emoticons": [\n' ' "megusta", \n' ' "coffee"\n' ' ]\n' '}') self.assertEqual(p.parse(s), t)
def test_Parse_Links_Single(self): fake_url_fetcher = FakeUrlFetcher({ "http://www.nbcolympics.com": "<title>NBC Olympics | 2014 NBC Olympics in Sochi Russia</title>"}) p = HipChatParser(url_fetcher=fake_url_fetcher) s = 'Olympics are starting soon; http://www.nbcolympics.com' t = ('{\n' ' "links": [\n' ' {\n' ' "title": "NBC Olympics | 2014 NBC Olympics in Sochi Russia", \n' ' "url": "http://www.nbcolympics.com"\n' ' }\n' ' ]\n' '}') self.assertEqual(p.parse(s), t)
def __init__(self, number_workers=5): self._worker_q = Queue.Queue() self.out_q = Queue.Queue() self._number_workers = number_workers self._threads = [] # Make a "fast" parser, by simply install a url fetcher that return an empty string. # (sometimes you just have to love the power of dependency injection :) self._fastParser = HipChatParser(NullUrlFetcher())
def __init__(self, thread_id, in_q, out_q, timeout=1): threading.Thread.__init__(self) self.daemon = True self.name = "Worker %d" % thread_id self._logger = logging.getLogger(self.name) self._in_q = in_q self._out_q = out_q self._timeout = timeout self._stopped = threading.Event() # Use a parser to do this lookup self._parser = HipChatParser()
def test_Parse_Everything(self): fake_url_fetcher = FakeUrlFetcher({ "https://twitter.com/jdorfman/status/430511497475670016": "<title>Justin Dorfman on Twitter: "nice @littlebigdetail from @HipChat (shows hex " "colors when pasted in chat). http://t.co/7cI6Gjy5pq"</title>"}) p = HipChatParser(url_fetcher=fake_url_fetcher) s = '@bob @john (success) such a cool feature; https://twitter.com/jdorfman/status/430511497475670016' t = ('{\n' ' "emoticons": [\n' ' "success"\n' ' ], \n' ' "links": [\n' ' {\n' ' "title": "Justin Dorfman on Twitter: \\\"nice @littlebigdetail from @HipChat (shows hex ' 'colors when pasted in chat). http://t.co/7cI6Gjy5pq\\\"\", \n' ' "url": "https://twitter.com/jdorfman/status/430511497475670016"\n' ' }\n' ' ], \n' ' "mentions": [\n' ' "bob", \n' ' "john"\n' ' ]\n' '}') self.assertMultiLineEqual(p.parse(s), t)
def test2(strings): parser = HipChatParser(url_fetcher=FakeUrlFetcher({})) stats = PerformanceStatistician() for x in strings: stats.measure(lambda: parser.parse(x)) print stats.report()
def test1(strings): parser = HipChatParser(url_fetcher=FakeUrlFetcher({})) iterations = 1000 executor = lambda: all(parser.parse(x) is not None for x in strings) print '{} messages: {:f} seconds'.format(len(strings) * iterations, timeit.timeit(executor, number=iterations))
def test_Parse_Emoticons_TooLongIdentifier_EmptyJsonString(self): p = HipChatParser() s = 'Good morning! (thisIsTooLongToBeAnEmoticon)' t = '{}' self.assertEqual(p.parse(s), t)
def test_Parse_Emoticons_UnmatchedBrackets_EmptyJsonString(self): p = HipChatParser() s = 'Good morning! (megusta coffee)' t = '{}' self.assertEqual(p.parse(s), t)
def test_Parse_Mentions_BareAt_EmptyJsonString(self): p = HipChatParser() s = 'this string contains a lonely @ sign' self.assertEqual(p.parse(s), '{}')
def test_Parse_StringWithoutAnyMarkup_EmptyJsonString(self): p = HipChatParser() s = 'this string contains no interesting markup' self.assertEqual(p.parse(s), '{}')
def test_Parse_EmptyString_EmptyJsonString(self): p = HipChatParser() s = '' self.assertEqual(p.parse(s), '{}')
class AsyncParser: """ Create a message parser which decodes details about the provided messages and dispatches the resulting augmented messages to an output queue. """ _logger = logging.getLogger('AsyncParser') def __init__(self, number_workers=5): self._worker_q = Queue.Queue() self.out_q = Queue.Queue() self._number_workers = number_workers self._threads = [] # Make a "fast" parser, by simply install a url fetcher that return an empty string. # (sometimes you just have to love the power of dependency injection :) self._fastParser = HipChatParser(NullUrlFetcher()) def start(self): """ Start pulling messages from the queue and dispatching them to the out queue """ self._logger.debug('Starting...') # In a real app, we would manage these threads more intelligently self._threads = [self._create_worker(i) for i in range(self._number_workers)] self._logger.info('Started') def stop(self): """ Shutdown this processor in an orderly fashion """ self._logger.debug('Stopping...') for t in self._threads: t.join() self._logger.info('Stopped') def parse(self, msg): """ Parses the given message and send the result to the output queue """ self._logger.debug('Parsing: %s', msg) # Quickly decode the details that we can do without delay msg.details = self._fastParser.parse_to_dict(msg.text) msg.details_as_json = self._fastParser.dict_to_json(msg.details) # Pumps out the message. "slow" details are not yet filled in self.out_q.put(msg) # If the message had links, send it to the workers, which will # produced an updated message once the details are filled in if HipChatParser.DETAIL_LINKS in msg.details: self._worker_q.put(msg) def _create_worker(self, worker_id): """ Create and start a worker that will collect more costly message details """ w = ParserWorkerThread(worker_id, self._worker_q, self.out_q) w.start() return w