Пример #1
0
class TestOperations(unittest.TestCase):
    '''Test Class'''
    def setUp(self):
        original_mongo_uri = settings.MONGODB_URI
        original_db = original_mongo_uri.split("/")[-1]
        self.test_db = "test" + original_db
        test_uri = "/".join(
            original_mongo_uri.split("/")[0:-1]) + "/" + self.test_db
        self.testing_storage = Storage(uri=test_uri)

    def tearDown(self):
        Storage().get_connection().drop_database(self.test_db)

    def test_get_articles(self):
        collector = CollectArticles(("brokenpromises.channels.guardian", ),
                                    "2014", "1")
        results = collector.run()
        print
        print "results:", len(results)
        assert len(results) > 0
        for result in results:
            assert result.ref_dates, "%s : %s" % (result, result.url)
        assert collector.get_report()
        assert collector.get_report(
        ).collector == "brokenpromises.operations.CollectArticles", collector.get_report(
        ).collector
        assert collector.get_report().meta['count'] == len(results)
        assert collector.get_report().meta['related_articles'] <= len(results)
        assert len(collector.get_report().meta['urls_found']) == len(results)

    def test_get_articles_with_storage(self):
        from brokenpromises import Article
        searched_date = (2014, 1, None)
        collector = CollectArticles(("brokenpromises.channels.nytimes", ),
                                    *searched_date,
                                    use_storage=True)
        # replace storage with custom storage (testing db)
        collector.storage = self.testing_storage
        results = collector.run()
        print
        print "results:", len(results)
        assert len(results) > 0
        for result in results:
            assert result.ref_dates, "%s : %s" % (result, result.url)
        assert collector.get_report()
        assert collector.get_report(
        ).collector == "brokenpromises.operations.CollectArticles"
        assert collector.get_report().meta['count'] == len(results)
        assert collector.get_report().meta['related_articles'] <= len(results)
        assert len(collector.get_report().meta['urls_found']) == len(results)
        assert len(
            self.testing_storage.get_reports(
                name="collector", searched_date=searched_date,
                status="done")) == 1, self.testing_storage.get_reports(
                    searched_date)
        results = collector.run()
        assert len(results) > 0, results
        assert type(results[0]) is Article, type(results[0])
        assert len(
            self.testing_storage.get_reports(searched_date=searched_date)) == 2
        assert len(
            self.testing_storage.get_reports(name="collector",
                                             searched_date=searched_date)) == 2
        assert len(
            self.testing_storage.get_reports(name="collector",
                                             searched_date=searched_date,
                                             status="escaped")) == 1

    def test_get_articles_with_queue(self):
        # need to explicitly import the runnable object
        from brokenpromises.operations import CollectArticles
        from brokenpromises.worker import worker
        collector = CollectArticles(("brokenpromises.channels.guardian", ),
                                    "2014",
                                    1,
                                    use_storage=False)
        worker.run(collector)

    def test_retrieve_referenced_dates(self):
        dates = (
            ("10 October 2013", (2013, 10, 10)),
            ("10 october, 2013", (2013, 10, 10)),
            ("4 by October 2013", (2013, 10, 4)),
            ("10 by October 2013", (2013, 10, 10)),
            ("10 by October, 2013", (2013, 10, 10)),
            ("Jan 2014", (2014, 1, None)),
            ("10 in October 2013", (2013, 10, 10)),
            ("10 in October, 2013", (2013, 10, 10)),
            ("10 of October 2013", (2013, 10, 10)),
            ("10 of October, 2013", (2013, 10, 10)),
            ("10th October 2013", (2013, 10, 10)),
            ("10th by October 2013", (2013, 10, 10)),
            ("10th by October, 2013", (2013, 10, 10)),
            ("10th in october 2013", (2013, 10, 10)),
            ("10th in October, 2013", (2013, 10, 10)),
            ("10th of October 2013", (2013, 10, 10)),
            ("10th of October, 2013", (2013, 10, 10)),
            ("2013-10-10", (2013, 10, 10)),
            ("2013/10/10", (2013, 10, 10)),
            ("August, 2013", (2013, 8, None)),
            ("2013", (2013, None, None)),
            ("November 04, 2013", (2013, 11, 4)),
            ("November 4, 2013", (2013, 11, 4)),
        )

        text = " bla bli 123. Bu \n pouet12 \n 12412 ".join(
            [_[0] for _ in dates])
        refs = CollectArticles.retrieve_referenced_dates(text)
        date_found = [_['extracted_date'] for _ in refs]
        for searched_date in dates:
            try:
                ref = filter(lambda _: _["extracted_date"] == searched_date[0],
                             refs)[0]
            except:
                raise Exception(
                    "\"%s\" not found in document. Date found:\n%s" %
                    (searched_date[0], "\n".join(date_found)))
            assert ref['extracted_date'] in searched_date[0]
            assert ref['date'] == searched_date[1], "%s != %s" % (
                ref['date'], searched_date[1])
            date_found.remove(ref['extracted_date'])
        assert len(refs) == len(dates), "%s != %s\nToo much : %s" % (
            len(refs), len(dates), date_found)
Пример #2
0
class TestOperations(unittest.TestCase):
	'''Test Class'''

	def setUp(self):
		original_mongo_uri = settings.MONGODB_URI
		original_db        = original_mongo_uri.split("/")[-1]
		self.test_db       = "test" + original_db
		test_uri           = "/".join(original_mongo_uri.split("/")[0:-1]) + "/" + self.test_db
		self.testing_storage = Storage(uri=test_uri)

	def tearDown(self):
		Storage().get_connection().drop_database(self.test_db)

	def test_get_articles(self):
		collector = CollectArticles(("brokenpromises.channels.guardian",), "2014", "1")
		results   = collector.run()
		print 
		print "results:", len(results)
		assert len(results) > 0
		for result in results:
			assert result.ref_dates, "%s : %s" % (result, result.url)
		assert collector.get_report()
		assert collector.get_report().collector                == "brokenpromises.operations.CollectArticles", collector.get_report().collector
		assert collector.get_report().meta['count']            == len(results)
		assert collector.get_report().meta['related_articles'] <= len(results)
		assert len(collector.get_report().meta['urls_found'])  == len(results)

	def test_get_articles_with_storage(self):
		from brokenpromises import Article
		searched_date = (2014, 1, None)
		collector = CollectArticles(("brokenpromises.channels.nytimes",), *searched_date, use_storage=True)
		# replace storage with custom storage (testing db)
		collector.storage = self.testing_storage
		results           = collector.run()
		print 
		print "results:", len(results)
		assert len(results) > 0
		for result in results:
			assert result.ref_dates, "%s : %s" % (result, result.url)
		assert collector.get_report()
		assert collector.get_report().collector                == "brokenpromises.operations.CollectArticles"
		assert collector.get_report().meta['count']            == len(results)
		assert collector.get_report().meta['related_articles'] <= len(results)
		assert len(collector.get_report().meta['urls_found'])  == len(results)
		assert len(self.testing_storage.get_reports(name="collector", searched_date=searched_date, status="done")) == 1, self.testing_storage.get_reports(searched_date)
		results = collector.run()
		assert len(results) > 0, results
		assert type(results[0]) is Article, type(results[0])
		assert len(self.testing_storage.get_reports(searched_date=searched_date)) == 2
		assert len(self.testing_storage.get_reports(name="collector", searched_date=searched_date)) == 2
		assert len(self.testing_storage.get_reports(name="collector", searched_date=searched_date, status="escaped")) == 1

	def test_get_articles_with_queue(self):
		# need to explicitly import the runnable object
		from brokenpromises.operations import CollectArticles
		from brokenpromises.worker     import worker
		collector = CollectArticles(("brokenpromises.channels.guardian",), "2014", 1, use_storage=False)
		worker.run(collector)

	def test_retrieve_referenced_dates(self):
		dates = (
			("10 October 2013"       , (2013, 10, 10)),
			("10 october, 2013"      , (2013, 10, 10)),
			("4 by October 2013"     , (2013, 10, 4)),
			("10 by October 2013"    , (2013, 10, 10)),
			("10 by October, 2013"   , (2013, 10, 10)),
			("Jan 2014"              , (2014, 1, None)),
			("10 in October 2013"    , (2013, 10, 10)),
			("10 in October, 2013"   , (2013, 10, 10)),
			("10 of October 2013"    , (2013, 10, 10)),
			("10 of October, 2013"   , (2013, 10, 10)),
			("10th October 2013"     , (2013, 10, 10)),
			("10th by October 2013"  , (2013, 10, 10)),
			("10th by October, 2013" , (2013, 10, 10)),
			("10th in october 2013"  , (2013, 10, 10)),
			("10th in October, 2013" , (2013, 10, 10)),
			("10th of October 2013"  , (2013, 10, 10)),
			("10th of October, 2013" , (2013, 10, 10)),
			("2013-10-10"            , (2013, 10, 10)),
			("2013/10/10"            , (2013, 10, 10)),
			("August, 2013"          , (2013, 8, None)),
			("2013"                  , (2013, None, None)),
			("November 04, 2013"     , (2013, 11, 4)),
			("November 4, 2013"      , (2013, 11, 4)),
		)

		text  = " bla bli 123. Bu \n pouet12 \n 12412 ".join([_[0] for _ in dates])
		refs  = CollectArticles.retrieve_referenced_dates(text)
		date_found = [_['extracted_date'] for _ in refs]
		for searched_date in dates:
			try:
				ref = filter(lambda _: _["extracted_date"] == searched_date[0], refs)[0]
			except:
				raise Exception("\"%s\" not found in document. Date found:\n%s" % (searched_date[0], "\n".join(date_found)))
			assert ref['extracted_date'] in searched_date[0]
			assert ref['date']           == searched_date[1], "%s != %s" % (ref['date'], searched_date[1])
			date_found.remove(ref['extracted_date'])
		assert len(refs) == len(dates), "%s != %s\nToo much : %s" % (len(refs), len(dates), date_found)