class TestOperations(unittest.TestCase): '''Test Class''' def setUp(self): original_mongo_uri = settings.MONGODB_URI original_db = original_mongo_uri.split("/")[-1] self.test_db = "test" + original_db test_uri = "/".join( original_mongo_uri.split("/")[0:-1]) + "/" + self.test_db self.testing_storage = Storage(uri=test_uri) def tearDown(self): Storage().get_connection().drop_database(self.test_db) def test_get_articles(self): collector = CollectArticles(("brokenpromises.channels.guardian", ), "2014", "1") results = collector.run() print print "results:", len(results) assert len(results) > 0 for result in results: assert result.ref_dates, "%s : %s" % (result, result.url) assert collector.get_report() assert collector.get_report( ).collector == "brokenpromises.operations.CollectArticles", collector.get_report( ).collector assert collector.get_report().meta['count'] == len(results) assert collector.get_report().meta['related_articles'] <= len(results) assert len(collector.get_report().meta['urls_found']) == len(results) def test_get_articles_with_storage(self): from brokenpromises import Article searched_date = (2014, 1, None) collector = CollectArticles(("brokenpromises.channels.nytimes", ), *searched_date, use_storage=True) # replace storage with custom storage (testing db) collector.storage = self.testing_storage results = collector.run() print print "results:", len(results) assert len(results) > 0 for result in results: assert result.ref_dates, "%s : %s" % (result, result.url) assert collector.get_report() assert collector.get_report( ).collector == "brokenpromises.operations.CollectArticles" assert collector.get_report().meta['count'] == len(results) assert collector.get_report().meta['related_articles'] <= len(results) assert len(collector.get_report().meta['urls_found']) == len(results) assert len( self.testing_storage.get_reports( name="collector", searched_date=searched_date, status="done")) == 1, self.testing_storage.get_reports( searched_date) results = collector.run() assert len(results) > 0, results assert type(results[0]) is Article, type(results[0]) assert len( self.testing_storage.get_reports(searched_date=searched_date)) == 2 assert len( self.testing_storage.get_reports(name="collector", searched_date=searched_date)) == 2 assert len( self.testing_storage.get_reports(name="collector", searched_date=searched_date, status="escaped")) == 1 def test_get_articles_with_queue(self): # need to explicitly import the runnable object from brokenpromises.operations import CollectArticles from brokenpromises.worker import worker collector = CollectArticles(("brokenpromises.channels.guardian", ), "2014", 1, use_storage=False) worker.run(collector) def test_retrieve_referenced_dates(self): dates = ( ("10 October 2013", (2013, 10, 10)), ("10 october, 2013", (2013, 10, 10)), ("4 by October 2013", (2013, 10, 4)), ("10 by October 2013", (2013, 10, 10)), ("10 by October, 2013", (2013, 10, 10)), ("Jan 2014", (2014, 1, None)), ("10 in October 2013", (2013, 10, 10)), ("10 in October, 2013", (2013, 10, 10)), ("10 of October 2013", (2013, 10, 10)), ("10 of October, 2013", (2013, 10, 10)), ("10th October 2013", (2013, 10, 10)), ("10th by October 2013", (2013, 10, 10)), ("10th by October, 2013", (2013, 10, 10)), ("10th in october 2013", (2013, 10, 10)), ("10th in October, 2013", (2013, 10, 10)), ("10th of October 2013", (2013, 10, 10)), ("10th of October, 2013", (2013, 10, 10)), ("2013-10-10", (2013, 10, 10)), ("2013/10/10", (2013, 10, 10)), ("August, 2013", (2013, 8, None)), ("2013", (2013, None, None)), ("November 04, 2013", (2013, 11, 4)), ("November 4, 2013", (2013, 11, 4)), ) text = " bla bli 123. Bu \n pouet12 \n 12412 ".join( [_[0] for _ in dates]) refs = CollectArticles.retrieve_referenced_dates(text) date_found = [_['extracted_date'] for _ in refs] for searched_date in dates: try: ref = filter(lambda _: _["extracted_date"] == searched_date[0], refs)[0] except: raise Exception( "\"%s\" not found in document. Date found:\n%s" % (searched_date[0], "\n".join(date_found))) assert ref['extracted_date'] in searched_date[0] assert ref['date'] == searched_date[1], "%s != %s" % ( ref['date'], searched_date[1]) date_found.remove(ref['extracted_date']) assert len(refs) == len(dates), "%s != %s\nToo much : %s" % ( len(refs), len(dates), date_found)
class TestOperations(unittest.TestCase): '''Test Class''' def setUp(self): original_mongo_uri = settings.MONGODB_URI original_db = original_mongo_uri.split("/")[-1] self.test_db = "test" + original_db test_uri = "/".join(original_mongo_uri.split("/")[0:-1]) + "/" + self.test_db self.testing_storage = Storage(uri=test_uri) def tearDown(self): Storage().get_connection().drop_database(self.test_db) def test_get_articles(self): collector = CollectArticles(("brokenpromises.channels.guardian",), "2014", "1") results = collector.run() print print "results:", len(results) assert len(results) > 0 for result in results: assert result.ref_dates, "%s : %s" % (result, result.url) assert collector.get_report() assert collector.get_report().collector == "brokenpromises.operations.CollectArticles", collector.get_report().collector assert collector.get_report().meta['count'] == len(results) assert collector.get_report().meta['related_articles'] <= len(results) assert len(collector.get_report().meta['urls_found']) == len(results) def test_get_articles_with_storage(self): from brokenpromises import Article searched_date = (2014, 1, None) collector = CollectArticles(("brokenpromises.channels.nytimes",), *searched_date, use_storage=True) # replace storage with custom storage (testing db) collector.storage = self.testing_storage results = collector.run() print print "results:", len(results) assert len(results) > 0 for result in results: assert result.ref_dates, "%s : %s" % (result, result.url) assert collector.get_report() assert collector.get_report().collector == "brokenpromises.operations.CollectArticles" assert collector.get_report().meta['count'] == len(results) assert collector.get_report().meta['related_articles'] <= len(results) assert len(collector.get_report().meta['urls_found']) == len(results) assert len(self.testing_storage.get_reports(name="collector", searched_date=searched_date, status="done")) == 1, self.testing_storage.get_reports(searched_date) results = collector.run() assert len(results) > 0, results assert type(results[0]) is Article, type(results[0]) assert len(self.testing_storage.get_reports(searched_date=searched_date)) == 2 assert len(self.testing_storage.get_reports(name="collector", searched_date=searched_date)) == 2 assert len(self.testing_storage.get_reports(name="collector", searched_date=searched_date, status="escaped")) == 1 def test_get_articles_with_queue(self): # need to explicitly import the runnable object from brokenpromises.operations import CollectArticles from brokenpromises.worker import worker collector = CollectArticles(("brokenpromises.channels.guardian",), "2014", 1, use_storage=False) worker.run(collector) def test_retrieve_referenced_dates(self): dates = ( ("10 October 2013" , (2013, 10, 10)), ("10 october, 2013" , (2013, 10, 10)), ("4 by October 2013" , (2013, 10, 4)), ("10 by October 2013" , (2013, 10, 10)), ("10 by October, 2013" , (2013, 10, 10)), ("Jan 2014" , (2014, 1, None)), ("10 in October 2013" , (2013, 10, 10)), ("10 in October, 2013" , (2013, 10, 10)), ("10 of October 2013" , (2013, 10, 10)), ("10 of October, 2013" , (2013, 10, 10)), ("10th October 2013" , (2013, 10, 10)), ("10th by October 2013" , (2013, 10, 10)), ("10th by October, 2013" , (2013, 10, 10)), ("10th in october 2013" , (2013, 10, 10)), ("10th in October, 2013" , (2013, 10, 10)), ("10th of October 2013" , (2013, 10, 10)), ("10th of October, 2013" , (2013, 10, 10)), ("2013-10-10" , (2013, 10, 10)), ("2013/10/10" , (2013, 10, 10)), ("August, 2013" , (2013, 8, None)), ("2013" , (2013, None, None)), ("November 04, 2013" , (2013, 11, 4)), ("November 4, 2013" , (2013, 11, 4)), ) text = " bla bli 123. Bu \n pouet12 \n 12412 ".join([_[0] for _ in dates]) refs = CollectArticles.retrieve_referenced_dates(text) date_found = [_['extracted_date'] for _ in refs] for searched_date in dates: try: ref = filter(lambda _: _["extracted_date"] == searched_date[0], refs)[0] except: raise Exception("\"%s\" not found in document. Date found:\n%s" % (searched_date[0], "\n".join(date_found))) assert ref['extracted_date'] in searched_date[0] assert ref['date'] == searched_date[1], "%s != %s" % (ref['date'], searched_date[1]) date_found.remove(ref['extracted_date']) assert len(refs) == len(dates), "%s != %s\nToo much : %s" % (len(refs), len(dates), date_found)