Пример #1
0
    def test_with(self):
        t = MockTarget("foo")
        with t.open('w') as b:
            b.write("bar")

        with t.open('r') as b:
            self.assertEqual(list(b), ['bar'])
Пример #2
0
    def test_preprocessing(self, output_mock, input_mock):

        # -------- SET UP MOCK DATA ------------
        output_target = MockTarget('corpus_out', format=luigi.format.Nop)
        input_target = MockTarget('corpus_in', format=luigi.format.Nop)
        output_mock.return_value = output_target
        input_mock.return_value = input_target
        with input_target.open('w') as fp:
            pickle.dump([
                Doc("Ich bin der erste Post über ein Kulturinstitut"
                    "in der Landeshauptstadt"),
                Doc("Ich bin der 2 Post über mit Bezug zur "
                    "Landeshauptstadt. toll "),
                Doc("Trallala noch ein Post 2 zum Museum"),
                Doc("noch weitere Posts zum weitere testen."
                    'Barberini toll'),
                Doc("this document is in english")
            ], fp)

        # ------- RUN TASK UNDER TEST --------
        task = TopicModelingPreprocessCorpus()
        task.run()

        # ------- INSPECT OUTPUT -------
        with output_target.open("r") as fp:
            output = pickle.load(fp)  # nosec
        self.assertEqual(len(output), 2)
        self.assertEqual(output[0].tokens,
                         ['post', 'landeshauptstadt', 'toll'])
        self.assertEqual(output[1].tokens, ['weitere', 'weitere', 'toll'])
Пример #3
0
    def test_bytes(self):
        t = MockTarget("foo", format=Nop)
        with t.open('wb') as b:
            b.write(b"bar")

        with t.open('rb') as b:
            self.assertEqual(list(b), [b'bar'])
Пример #4
0
    def test_default_mode_value(self):
        t = MockTarget("foo")
        with t.open('w') as b:
            b.write("bar")

        with t.open() as b:
            self.assertEqual(list(b), ['bar'])
Пример #5
0
    def test_bytes(self):
        t = MockTarget("foo", format=Nop)
        with t.open('wb') as b:
            b.write(b"bar")

        with t.open('rb') as b:
            self.assertEqual(list(b), [b'bar'])
Пример #6
0
    def test_with(self):
        t = MockTarget("foo")
        with t.open('w') as b:
            b.write("bar")

        with t.open('r') as b:
            self.assertEqual(list(b), ['bar'])
Пример #7
0
    def test_unicode(self):
        t = MockTarget("foo")
        with t.open('w') as b:
            b.write(u"bar")

        with t.open('r') as b:
            self.assertEqual(b.read(), u'bar')
Пример #8
0
    def test_default_mode_value(self):
        t = MockTarget("foo")
        with t.open('w') as b:
            b.write("bar")

        with t.open() as b:
            self.assertEqual(list(b), ['bar'])
Пример #9
0
    def test_unicode(self):
        t = MockTarget("foo")
        with t.open('w') as b:
            b.write(u"bar")

        with t.open('r') as b:
            self.assertEqual(b.read(), u'bar')
Пример #10
0
    def test_post_performance_transformation(self, input_mock, output_mock,
                                             request_mock):
        self.db_connector.execute(
            '''INSERT INTO ig_post (ig_post_id) VALUES
                (0123456789),
                (9876543210)''', '''INSERT INTO ig_post_performance VALUES
                (0123456789, '2019-10-04', 5, 4, 3, 2, 1, 2, 1, 0, 1, 0)''')
        input_target = MockTarget('posts_in', format=UTF8)
        input_mock.return_value = input_target
        output_target = MockTarget('insights_out', format=UTF8)
        output_mock.return_value = output_target

        with input_target.open('w') as posts_target:
            with open(f'{IG_TEST_DATA}/post_expected.csv',
                      'r',
                      encoding='utf-8') as posts_input:
                posts_target.write(posts_input.read())

        with open(f'{IG_TEST_DATA}/post_insights_video_actual.json',
                  'r',
                  encoding='utf-8') as json_video_in:
            input_video_insights = json_video_in.read()

        with open(f'{IG_TEST_DATA}/post_insights_no_video_actual.json',
                  'r',
                  encoding='utf-8') as json_no_video_in:
            input_no_video_insights = json_no_video_in.read()

        with open(f'{IG_TEST_DATA}/post_insights_expected.csv',
                  'r',
                  encoding='utf-8') as expected_data_in:
            expected_df = pd.read_csv(expected_data_in)

        def mock_video_json():
            return json.loads(input_video_insights)

        def mock_no_video_json():
            return json.loads(input_no_video_insights)

        mock_video_response = MagicMock(ok=True, json=mock_video_json)
        mock_no_video_response = MagicMock(ok=True, json=mock_no_video_json)
        request_mock.side_effect = [
            mock_video_response, mock_no_video_response
        ]

        with freeze_time('2020-01-01 00:00:05'):
            self.task = instagram.FetchIgPostPerformance(
                columns=[
                    column[0]
                    for column in instagram.IgPostPerformanceToDb().columns
                ],
                timespan=dt.timedelta(days=100000),
                table='ig_post_performance')
            self.task.run()

        with output_target.open('r') as output_data:
            output_df = pd.read_csv(output_data)
        pd.testing.assert_frame_equal(expected_df, output_df)
Пример #11
0
    def test_1(self):
        t = MockTarget('test')
        p = t.open('w')
        print('test', file=p)
        p.close()

        q = t.open('r')
        self.assertEqual(list(q), ['test\n'])
        q.close()
Пример #12
0
    def test_1(self):
        t = MockTarget('test')
        p = t.open('w')
        print('test', file=p)
        p.close()

        q = t.open('r')
        self.assertEqual(list(q), ['test\n'])
        q.close()
    def test_scrape_order_contains(self, output_mock, input_mock):

        test_data = pd.read_csv(
            'tests/test_data/gomus/scrape_order_contains_data.csv')

        html_file_names = []
        all_order_ids = test_data['order_id'].drop_duplicates()
        for order_id in all_order_ids:
            new_html_task = FetchGomusHTML(url=f"/admin/orders/{order_id}")
            new_html_task.run()
            html_file_names.append(new_html_task.output().path)

        input_target = MockTarget('orders_htmls_out', format=UTF8)
        input_mock.return_value = input_target
        with input_target.open('w') as input_file:
            input_file.write('\n'.join(html_file_names))

        output_target = MockTarget('scraped_order_contains_out', format=UTF8)
        output_mock.return_value = output_target

        # -- execute code under test --
        ScrapeGomusOrderContains().run()

        # -- inspect results --
        with output_target.open('r') as output_file:
            actual_output = pd.read_csv(output_file)

        self.assertEqual(len(test_data.index), len(actual_output.index))

        for i in range(len(actual_output)):
            expected_row = test_data.iloc[i]
            actual_row = actual_output.iloc[i]

            # test if order stayed the same
            self.assertEqual(expected_row['order_id'], actual_row['order_id'])

            # test if scraped data is correct
            hash_string = ','.join([
                str(actual_row['article_id']),
                str(actual_row['article_type']),
                str(actual_row['ticket']),
                str(actual_row['date']),
                str(actual_row['quantity']),
                str(actual_row['price']),
                str(actual_row['is_cancelled'])
            ])
            actual_hash = mmh3.hash(hash_string, seed=self.hash_seed)
            self.assertEqual(actual_hash,
                             expected_row['expected_hash'],
                             msg=f"Scraper got wrong values:\n\
{str(actual_row) if sys.stdin.isatty() else 'REDACTED ON NON-TTY'}")
Пример #14
0
    def test_exhibition_times(self, output_mock, requests_get_mock):

        output_target = MockTarget('exhibition_out', format=UTF8)
        output_mock.return_value = output_target

        with open('tests/test_data/gomus/exhibitions/exhibitions_actual.json',
                  'r',
                  encoding='utf-8') as data_in:
            input_data = data_in.read()

        with open(
                'tests/test_data/gomus/exhibitions/'
                'exhibition_times_expected.csv',
                'r',
                encoding='utf-8') as data_out:
            expected_data = data_out.read()

        # Overwrite requests 'get' return value to provide our test data
        def mock_json():
            return json.loads(input_data)

        mock_response = MagicMock(ok=True, json=mock_json)
        requests_get_mock.return_value = mock_response

        FetchExhibitionTimes().run()

        with output_target.open('r') as output_data:
            self.assertEqual(expected_data, output_data.read())
Пример #15
0
    def test_empty_tweet_performance(self, output_mock, raw_tweets_mock):
        output_target = MockTarget('perform_extracted_out', format=UTF8)
        output_mock.return_value = output_target

        with open(
                'tests/test_data/twitter/empty_raw_tweets.csv',
                'r',
                encoding='utf-8') as data_in:
            raw_tweets = data_in.read()

        with open(
                'tests/test_data/twitter/empty_tweet_performance.csv',
                'r',
                encoding='utf-8') as data_out:
            expected_performance = data_out.read()

        self.install_mock_target(
            raw_tweets_mock,
            lambda file: file.write(raw_tweets))

        task = ExtractTweetPerformance(table='tweet_performance')
        task.run()

        with output_target.open('r') as output_file:
            output = output_file.read()
        self.assertEqual(output, expected_performance)
Пример #16
0
    def test_extract_tweet_performance(self, output_mock, raw_tweets_mock):
        self.db_connector.execute('''
            INSERT INTO tweet (tweet_id) VALUES
                ('1234567890123456789'),
                ('111111111111111111'),
                ('2222222222222222222')
            ''')
        output_target = MockTarget('perform_extracted_out', format=UTF8)
        output_mock.return_value = output_target

        with open('tests/test_data/twitter/raw_tweets.csv',
                  'r',
                  encoding='utf-8') as data_in:
            raw_tweets = data_in.read()

        with open('tests/test_data/twitter/expected_tweet_performance.csv',
                  'r',
                  encoding='utf-8') as data_out:
            expected_performance = data_out.read()

        self.install_mock_target(raw_tweets_mock,
                                 lambda file: file.write(raw_tweets))

        task = ExtractTweetPerformance(table='tweet_performance')
        task.run()

        with output_target.open('r') as output_file:
            output = output_file.read()
        self.assertEqual(
            output.split('\n')[0],
            expected_performance.split('\n')[0])
        for i in range(1, 3):
            self.assertEqual(  # cutting away the timestamp
                output.split('\n')[i].split(';')[:-1],
                expected_performance.split('\n')[i].split(';')[:-1])
    def test_post_transformation(self, fact_mock, output_mock,
                                 requests_get_mock):
        fact_target = MockTarget('facts_in', format=UTF8)
        fact_mock.return_value = fact_target
        output_target = MockTarget('post_out', format=UTF8)
        output_mock.return_value = output_target

        with open(f'{FB_TEST_DATA}/post_actual.json', 'r',
                  encoding='utf-8') as data_in:
            input_data = data_in.read()

        with open(f'{FB_TEST_DATA}/post_expected.csv', 'r',
                  encoding='utf-8') as data_out:
            expected_data = data_out.read()

        # Overwrite requests 'get' return value to provide our test data
        def mock_json():
            return json.loads(input_data)

        mock_response = MagicMock(ok=True, json=mock_json)
        requests_get_mock.return_value = mock_response

        facebook.MuseumFacts().run()
        facebook.FetchFbPosts().run()

        with output_target.open('r') as output_data:
            self.assertEqual(expected_data, output_data.read())
Пример #18
0
    def test_audience_gender_age_transformation(self, input_mock, output_mock,
                                                request_mock):
        fact_target = MockTarget('facts_in', format=UTF8)
        input_mock.return_value = fact_target
        output_target = MockTarget('post_out', format=UTF8)
        output_mock.return_value = output_target

        with open(f'{IG_TEST_DATA}/audience_gender_age_actual.json',
                  'r',
                  encoding='utf-8') as data_in:
            input_data = data_in.read()

        with open(f'{IG_TEST_DATA}/audience_gender_age_expected.csv',
                  'r',
                  encoding='utf-8') as data_out:
            expected_data = data_out.read()

        def mock_json():
            return json.loads(input_data)

        mock_response = MagicMock(ok=True, json=mock_json)
        request_mock.return_value = mock_response

        instagram.MuseumFacts().run()
        with freeze_time('2020-01-01 00:00:05'):
            instagram.FetchIgAudienceGenderAge(columns=[
                col[0] for col in instagram.IgAudienceGenderAgeToDb().columns
            ]).run()

        with output_target.open('r') as output_data:
            self.assertEqual(output_data.read(), expected_data)
Пример #19
0
    def test_create_corpus(self, output_mock):

        # -------- SET UP MOCK DATA ------------
        output_target = MockTarget('corpus_out', format=luigi.format.Nop)
        output_mock.return_value = output_target
        self.db_connector.execute(
            '''
                INSERT INTO tweet(user_id,tweet_id,text,response_to,post_date)
                VALUES ('user_id', 'tweet_id', 'tweet text', NULL,
                        '2020-05-24 10:56:21')
            ''', '''
                INSERT INTO fb_post_comment(post_id,comment_id,post_date,
                    text,is_from_museum,response_to)
                VALUES ('post1','comment1','2020-05-24 10:56:21',
                        'text1',false,NULL),
                    ('post2','comment2','2018-05-24 10:56:21',
                        'text2',true,NULL)
            ''')

        # ------- RUN TASK UNDER TEST --------
        task = TopicModelingCreateCorpus()
        task.run()

        # ------- INSPECT OUTPUT -------
        with output_target.open("r") as fp:
            corpus = pickle.load(fp)  # nosec

        self.assertEqual(len(corpus), 2)
        self.assertIsInstance(corpus[0], Doc)
        self.assertIsInstance(corpus[1], Doc)
Пример #20
0
    def test_fetch_total_profile_metrics(self, fact_mock, output_mock,
                                         request_mock):
        fact_target = MockTarget('facts_in', format=UTF8)
        fact_mock.return_value = fact_target
        output_target = MockTarget('post_out', format=UTF8)
        output_mock.return_value = output_target

        with open(f'{IG_TEST_DATA}/total_profile_metrics_actual.json',
                  'r',
                  encoding='utf-8') as data_in:
            input_data = data_in.read()

        with open(f'{IG_TEST_DATA}/total_profile_metrics_expected.csv',
                  'r',
                  encoding='utf-8') as data_out:
            expected_data = data_out.read()

        def mock_json():
            return json.loads(input_data)

        mock_response = MagicMock(ok=True, json=mock_json)
        request_mock.return_value = mock_response

        with freeze_time('2020-01-01 00:00:05'):
            self.run_task(
                instagram.FetchIgTotalProfileMetrics(columns=[
                    col[0]
                    for col in instagram.IgTotalProfileMetricsToDb().columns
                ]))

        with output_target.open('r') as output_data:
            self.assertEqual(output_data.read(), expected_data)
Пример #21
0
    def test_two(self, input_mock, output_mock):

        df_in0 = pd.DataFrame([[1, 'foo'], [2, 'bar']], columns=['a', 'b'])
        df_in1 = pd.DataFrame([[42, 'spam'], [1337, 'häm']],
                              columns=['a', 'b'])
        input_mock.return_value = [
            self.install_mock_target(MagicMock(),
                                     lambda file: df.to_csv(file, index=False))
            for df in [df_in0, df_in1]
        ]
        output_target = MockTarget(str(self), format=UTF8)
        output_mock.return_value = output_target

        self.task = ConcatCsvs()
        self.run_task(self.task)
        with output_target.open('r') as output:
            df_out = pd.read_csv(output)

        df_expected = pd.DataFrame([
            [1, 'foo'],
            [2, 'bar'],
            [42, 'spam'],
            [1337, 'häm'],
        ],
                                   columns=['a', 'b'])
        pd.testing.assert_frame_equal(df_expected, df_out)
Пример #22
0
    def test_fetch_twitter(self, output_mock):
        output_target = MockTarget('raw_out', format=UTF8)
        output_mock.return_value = output_target

        # Dirty workaround for pandas's peculiarities regarding default values
        none = object()
        expected_tweet = {
            'tweet_id': 1225435275301654531,
            'text': "#MuseumBarberini is cool!",
            'user_id': 1189538451097608193,
            'parent_tweet_id': none,
            'timestamp': '2020-02-06 16:05:11+01:00'
        }

        with freeze_time('2020-02-06'):
            # On this day our team's account has posted a related tweet
            # See https://twitter.com/bpfn2020/status/1225435275301654531
            FetchTwitter(timespan=dt.timedelta(days=1)).run()

        with output_target.open('r') as output_file:
            output_df = pd.read_csv(output_file)
        output_df = output_df.fillna(none)

        filtered_df = output_df
        for key, value in expected_tweet.items():
            filtered_df = filtered_df[filtered_df[key] == value]
            self.assertTrue(
                len(filtered_df) >= 1,
                f"Did not find any tweet with {key} = {value}")
Пример #23
0
    def test_app_id(self, input_mock):

        input_target = MockTarget('museum_facts', format=UTF8)
        input_mock.return_value = input_target
        with input_target.open('w') as fp:
            json.dump({'ids': {'gplay': {'appId': 'some ID'}}}, fp)
        self.task._app_id = None

        app_id = FetchGplayReviews().app_id

        self.assertEqual(app_id, 'some ID')
Пример #24
0
    def test_run(self, input_mock, output_mock, mock_fetch, mock_lang):

        input_target = MockTarget('museum_facts', format=UTF8)
        input_mock.return_value = input_target
        with input_target.open('w') as fp:
            json.dump(
                {'ids': {'gplay': {
                    'appId': 'com.barberini.museum.barberinidigital'
                }}},
                fp
            )
        output_target = MockTarget('gplay.gplay_reviews', format=UTF8)
        output_mock.return_value = output_target

        FetchGplayReviews().run()

        expected = pd.DataFrame([RESPONSE_ELEM_1_RENAMED_COLS])
        with output_target.open('r') as output_file:
            actual = pd.read_csv(output_file)

        pd.testing.assert_frame_equal(expected, actual)
Пример #25
0
    def test_one(self, input_mock, output_mock):

        df_in = pd.DataFrame([[1, 'foo'], [2, 'bar']], columns=['a', 'b'])
        self.install_mock_target(input_mock,
                                 lambda file: df_in.to_csv(file, index=False))
        output_target = MockTarget(str(self))
        output_mock.return_value = output_target

        self.task = ConcatCsvs()
        self.run_task(self.task)
        with output_target.open('r') as output:
            df_out = pd.read_csv(output)

        pd.testing.assert_frame_equal(df_in, df_out)
Пример #26
0
class GomusFormatTest(DatabaseTestCase):
    """The abstract base class for gomus format tests."""
    def __init__(self, report, expected_format, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.report = report
        self.expected_format = expected_format

    def prepare_output_target(self, output_mock):
        self.output_target = MockTarget('data_out', format=UTF8)
        output_mock.return_value = [self.output_target]

    def fetch_gomus_report(self, suffix='_7days', sheet=[0]):
        self.run_task(
            FetchGomusReport(report=self.report,
                             suffix=suffix,
                             sheet_indices=sheet))

    def check_format(self, skiprows=0, skipfooter=0):
        with self.output_target.open('r') as output_file:
            df = pd.read_csv(output_file,
                             skipfooter=skipfooter,
                             skiprows=skiprows,
                             engine='python')

            for i in range(len(self.expected_format)):
                if df.columns[i] == 'Keine Daten vorhanden':
                    break
                # this checks whether the columns are named right
                self.assertEqual(df.columns[i], self.expected_format[i][0])
                df.apply(lambda x: self.check_type(
                    x[self.expected_format[i][0]], self.expected_format[i][1]),
                         axis=1)

    def check_type(self, data, expected_type):
        # To check if the data in the columns has the right type,
        # we try to converte the string into the expected type and
        # catch a ValueError or TypeError, if something goes wrong.
        # As we don't process data from type "STRING"/ just store
        # it as text, we don't have to explicitly check the type.
        try:
            if data == '':
                pass
            elif expected_type == 'FLOAT':
                float(data)
            elif expected_type == 'DATE':
                dt.datetime.strptime(data, '%d.%m.%Y')
            elif expected_type == 'TIME':
                dt.datetime.strptime(data, '%H:%M')
        except (ValueError, TypeError):
            self.assertTrue(False, f'{data} is not from type {expected_type}')
class DummyFileWrapper(luigi.Task):
    """Dummy task to write an output file."""
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.mock_target = MockTarget(f'DummyFileWrapperMock{hash(self)}',
                                      format=luigi.format.UTF8)

    csv = luigi.Parameter()

    def run(self):
        with self.mock_target.open('w') as input_file:
            input_file.write(self.csv)

    def output(self):
        return self.mock_target
Пример #28
0
    def test_post_transformation(self, fact_mock, output_mock, request_mock):
        fact_target = MockTarget('facts_in', format=UTF8)
        fact_mock.return_value = fact_target
        output_target = MockTarget('post_out', format=UTF8)
        output_mock.return_value = output_target

        with open(f'{IG_TEST_DATA}/post_actual.json', 'r',
                  encoding='utf-8') as data_in:
            input_data = data_in.read()
        expected_data = pd.read_csv(f'{IG_TEST_DATA}/post_expected.csv')

        request_mock.side_effect = lambda url: \
            MagicMock(ok=True, json=lambda: json.loads(input_data))

        self.run_task(instagram.FetchIgPosts())

        with output_target.open('r') as output_data:
            actual_data = pd.read_csv(output_data)
        pd.testing.assert_frame_equal(actual_data, expected_data)
Пример #29
0
    def iter_task(self, mock_codes, max_index):

        gen = self.task.run()
        dep = next(gen)
        for i, code in enumerate(mock_codes):
            self.assertIsInstance(dep, FetchGomusHTML)
            self.assertLessEqual(i, max_index)

            if 200 <= code < 300:
                target = MockTarget(f'quota_{i}.html')
            elif code in dep.ignored_status_codes:
                target = MockTarget(f'quota_{i}.html.error')
            else:
                raise ValueError("Unhandled status code")
            with target.open('w'):
                pass

            try:
                dep = gen.send(FailableTarget(target))
            except StopIteration:
                dep = None
                break
        self.assertFalse(dep)
Пример #30
0
    def test_audience_origin_transformation(self, input_mock, output_mock,
                                            request_mock):
        fact_target = MockTarget('facts_in', format=UTF8)
        input_mock.return_value = fact_target
        output_target = MockTarget('post_out', format=UTF8)
        output_mock.return_value = output_target

        with open(f'{IG_TEST_DATA}/audience_origin_actual.json',
                  'r',
                  encoding='utf-8') as data_in:
            input_data = data_in.read()

        with open(f'{IG_TEST_DATA}/audience_origin_expected.csv',
                  'r',
                  encoding='utf-8') as data_out:
            expected_data = data_out.read()

        def mock_json():
            return json.loads(input_data)

        mock_response = MagicMock(ok=True, json=mock_json)
        request_mock.return_value = mock_response

        instagram.MuseumFacts().run()

        with freeze_time('2020-01-01 00:00:05'):
            # Use city mode for testing, though the
            # transformation is the same for countries
            # The only difference between the two is the received,
            # data, which cannot be tested here
            instagram.FetchIgAudienceOrigin(columns=[
                col[0] for col in instagram.IgAudienceCityToDb().columns
            ],
                                            country_mode=False).run()

        with output_target.open('r') as output_data:
            self.assertEqual(output_data.read(), expected_data)
    def prepare_post_performance_mocks(self, input_mock, output_mock,
                                       requests_get_mock, actual_json):
        input_target = MockTarget('posts_in', format=UTF8)
        input_mock.return_value = input_target
        output_target = MockTarget('insights_out', format=UTF8)
        output_mock.return_value = output_target

        with input_target.open('w') as posts_target:
            with open(f'{FB_TEST_DATA}/post_expected_single.csv',
                      'r',
                      encoding='utf-8') as posts_input:
                posts_target.write(posts_input.read())

        with open(f'{FB_TEST_DATA}/{actual_json}', 'r',
                  encoding='utf-8') as json_in:
            input_json = json_in.read()

        def mock_json():
            return json.loads(input_json)

        mock_response = MagicMock(ok=True, json=mock_json)
        requests_get_mock.return_value = mock_response

        return output_target
Пример #32
0
    def test_post_transformation(self, fact_mock, output_mock, request_mock):
        fact_target = MockTarget('facts_in', format=UTF8)
        fact_mock.return_value = fact_target
        output_target = MockTarget('post_out', format=UTF8)
        output_mock.return_value = output_target

        with open(f'{IG_TEST_DATA}/post_actual.json', 'r',
                  encoding='utf-8') as data_in:
            input_data = data_in.read()

        with open(f'{IG_TEST_DATA}/post_expected.csv', 'r',
                  encoding='utf-8') as data_out:
            expected_data = data_out.read()

        def mock_json():
            return json.loads(input_data)

        mock_response = MagicMock(ok=True, json=mock_json)
        request_mock.return_value = mock_response

        self.run_task(instagram.FetchIgPosts())

        with output_target.open('r') as output_data:
            self.assertEqual(output_data.read(), expected_data)
Пример #33
0
 def _touch(self, path):
     t = MockTarget(path)
     with t.open('w'):
         pass
Пример #34
0
 def test_mode_none_error(self):
     t = MockTarget("foo")
     with self.assertRaises(TypeError):
         with t.open(None) as b:
             b.write("bar")
Пример #35
0
 def _touch(self, path):
     t = MockTarget(path)
     with t.open('w'):
         pass
Пример #36
0
 def test_mode_none_error(self):
     t = MockTarget("foo")
     with self.assertRaises(TypeError):
         with t.open(None) as b:
             b.write("bar")
Пример #37
0
    def test_fetch_twitter(self, output_mock):
        """Integration test! We post a real tweet and then try to fetch it."""
        output_target = MockTarget('raw_out', format=UTF8)
        output_mock.return_value = output_target

        sample = "TestBarberiniAnalyticsFetchTwitter" \
            + self.generate_random_hex_string(12)
        text = (
            f"This is an automated random tweet for integration testing of "
            "BarberiniAnalytics.\n\n"
            f"{sample}\n\n"
            "For more information, see: https://github.com/Museum-Barberini/"
            "Barberini-Analytics/blob/master/tests/test_twitter.py"
        )

        # ARRANGE
        tweet = self.post_tweet(text)
        time.sleep(3)  # Wait for the tweet to be processed

        # ACT
        FetchTwitter(
            query=sample,
            timespan=dt.timedelta(days=1)
        ).run()

        with output_target.open('r') as output_file:
            output_df = pd.read_csv(output_file)
        # Dirty workaround for pandas's peculiarities regarding default values
        none = object()
        output_df = output_df.fillna(none)
        output_df['text'] = output_df['text'].apply(
            lambda text: re.sub(r'\s+', ' ', text)
        )
        output_df['timestamp'] = output_df['timestamp'].apply(
            date_parser.parse)

        # ASSERT
        expected_tweet = {
            'tweet_id': tweet['id'],
            'user_id': tweet['user_id'],
            'parent_tweet_id': none,
            'timestamp': tweet['created_at']
        }
        expected_text = re.sub(r'\s+', ' ', tweet['text'])
        text_predicates = [
            lambda text: text.startswith(expected_text.split('…')[0]),
            lambda text: sample in text
        ]

        filtered_df = output_df
        for key, value in expected_tweet.items():
            previous_df = filtered_df
            filtered_df = filtered_df[filtered_df[key] == value]
            self.assertTrue(
                len(filtered_df) >= 1,
                f"Did not find any tweet with {key} = {value}, "
                f"values are: {previous_df[key]}")
        for index, predicate in enumerate(text_predicates):
            previous_df = filtered_df
            filtered_df = filtered_df[filtered_df['text'].apply(predicate)]
            self.assertTrue(
                len(filtered_df) >= 1,
                f"Did not find any tweet with text matching predicate {index}"
                f"\n\nValues are: {previous_df['text']}")