Пример #1
0
    def test_ignore_post_save_signal(self):
        dataset = DatasetFactory()
        unexpected_signals = Dataset.after_save, Dataset.on_update

        with assert_not_emit(*unexpected_signals), assert_emit(post_save):
            dataset.title = 'New title'
            dataset.save(signal_kwargs={'ignores': ['post_save']})
Пример #2
0
    def test_update_resource_missing_checksum_type(self):
        user = UserFactory()
        resource = ResourceFactory()
        dataset = DatasetFactory(owner=user, resources=[resource])
        resource.checksum.type = None

        with pytest.raises(db.ValidationError):
            dataset.update_resource(resource)
Пример #3
0
    def test_update_resource(self):
        user = UserFactory()
        resource = ResourceFactory()
        dataset = DatasetFactory(owner=user, resources=[resource])
        expected_signals = post_save, Dataset.after_save, Dataset.on_update

        resource.description = 'New description'

        with assert_emit(*expected_signals):
            dataset.update_resource(resource)
        assert len(dataset.resources) == 1
        assert dataset.resources[0].id == resource.id
        assert dataset.resources[0].description == 'New description'
Пример #4
0
    def test_add_resource_without_checksum(self):
        user = UserFactory()
        dataset = DatasetFactory(owner=user)
        resource = ResourceFactory(checksum=None)
        expected_signals = post_save, Dataset.after_save, Dataset.on_update

        with assert_emit(*expected_signals):
            dataset.add_resource(ResourceFactory(checksum=None))
        assert len(dataset.resources) == 1

        with assert_emit(*expected_signals):
            dataset.add_resource(resource)
        assert len(dataset.resources) == 2
        assert dataset.resources[0].id == resource.id
Пример #5
0
    def test_attach(self):
        datasets = DatasetFactory.create_batch(3)

        with NamedTemporaryFile() as csvfile:
            writer = csv.DictWriter(csvfile,
                                    fieldnames=['local', 'remote'],
                                    delimiter=b';',
                                    quotechar=b'"')

            writer.writeheader()
            for index, dataset in enumerate(datasets):
                writer.writerow({
                    'local': str(dataset.id),
                    'remote': str(index)
                })
            csvfile.flush()

            result = actions.attach('test.org', csvfile.name)

        assert result.success == len(datasets)
        assert result.errors == 0
        for index, dataset in enumerate(datasets):
            dataset.reload()
            assert dataset.extras['harvest:domain'] == 'test.org'
            assert dataset.extras['harvest:remote_id'] == str(index)
Пример #6
0
    def test_attach_skip_not_found(self):
        datasets = DatasetFactory.create_batch(3)

        with NamedTemporaryFile() as csvfile:
            writer = csv.DictWriter(csvfile,
                                    fieldnames=['local', 'remote'],
                                    delimiter=b';',
                                    quotechar=b'"')

            writer.writeheader()
            writer.writerow({
                'local': 'not-found',
                'remote': '42'
            })
            for index, dataset in enumerate(datasets):
                writer.writerow({
                    'local': str(dataset.id),
                    'remote': str(index)
                })
            csvfile.flush()

            result = actions.attach('test.org', csvfile.name)

        assert result.success == len(datasets)
        assert result.errors == 1
Пример #7
0
    def test_add_resource(self):
        user = UserFactory()
        dataset = DatasetFactory(owner=user)
        resource = ResourceFactory()
        expected_signals = (post_save, Dataset.after_save, Dataset.on_update,
                            Dataset.on_resource_added)

        with assert_emit(*expected_signals):
            dataset.add_resource(ResourceFactory())
        assert len(dataset.resources) == 1

        with assert_emit(*expected_signals):
            dataset.add_resource(resource)
        assert len(dataset.resources) == 2
        assert dataset.resources[0].id == resource.id
        assert dataset.resources[0].dataset == dataset
Пример #8
0
    def test_render_list_with_facets(self):
        '''It should render the dataset list page with facets'''

        with self.autoindex():
            datasets = DatasetFactory.create_batch(3, visible=True,
                                                   org=True, geo=True)

        response = self.get(url_for('datasets.list'))

        self.assert200(response)
        rendered_datasets = self.get_context_variable('datasets')
        self.assertEqual(len(rendered_datasets), len(datasets))
Пример #9
0
 def test_quality_all(self):
     user = UserFactory()
     visitor = UserFactory()
     dataset = DatasetFactory(owner=user, frequency='weekly',
                              tags=['foo', 'bar'], description='a' * 42)
     dataset.add_resource(ResourceFactory(format='pdf'))
     DiscussionFactory(
         subject=dataset, user=visitor,
         discussion=[MessageDiscussionFactory(posted_by=visitor)])
     assert dataset.quality['score'] == 0
     assert sorted(dataset.quality.keys()) == [
         'description_length',
         'discussions',
         'frequency',
         'has_only_closed_or_no_formats',
         'has_resources',
         'has_unavailable_resources',
         'has_untreated_discussions',
         'score',
         'tags_count',
         'update_in'
     ]
Пример #10
0
    def test_delete_home_dataset(self):
        '''Should pull home datasets on deletion'''
        current_site.settings.home_datasets = DatasetFactory.create_batch(3)
        current_site.save()

        dataset = current_site.settings.home_datasets[1]
        dataset.deleted = datetime.now()
        dataset.save()

        current_site.reload()
        home_datasets = [d.id for d in current_site.settings.home_datasets]
        self.assertEqual(len(home_datasets), 2)
        self.assertNotIn(dataset.id, home_datasets)
Пример #11
0
    def test_attach_does_not_duplicate(self):
        attached_datasets = []
        for i in range(2):
            dataset = DatasetFactory.build()
            dataset.extras['harvest:domain'] = 'test.org'
            dataset.extras['harvest:remote_id'] = str(i)
            dataset.last_modified = datetime.now()
            dataset.save()
            attached_datasets.append(dataset)

        datasets = DatasetFactory.create_batch(3)

        with NamedTemporaryFile() as csvfile:
            writer = csv.DictWriter(csvfile,
                                    fieldnames=['local', 'remote'],
                                    delimiter=b';',
                                    quotechar=b'"')

            writer.writeheader()
            for index, dataset in enumerate(datasets):
                writer.writerow({
                    'local': str(dataset.id),
                    'remote': str(index)
                })
            csvfile.flush()

            result = actions.attach('test.org', csvfile.name)

        dbcount = Dataset.objects(**{
            'extras__harvest:remote_id__exists': True
        }).count()
        assert result.success == len(datasets)
        assert dbcount == result.success
        for index, dataset in enumerate(datasets):
            dataset.reload()
            assert dataset.extras['harvest:domain'] == 'test.org'
            assert dataset.extras['harvest:remote_id'] == str(index)
Пример #12
0
    def test_minimal(self):
        dataset = DatasetFactory.build()  # Does not have an URL
        d = dataset_to_rdf(dataset)
        g = d.graph

        assert isinstance(d, RdfResource)
        assert len(list(g.subjects(RDF.type, DCAT.Dataset))) is 1

        assert g.value(d.identifier, RDF.type) == DCAT.Dataset

        assert isinstance(d.identifier, BNode)
        assert d.value(DCT.identifier) == Literal(dataset.id)
        assert d.value(DCT.title) == Literal(dataset.title)
        assert d.value(DCT.issued) == Literal(dataset.created_at)
        assert d.value(DCT.modified) == Literal(dataset.last_modified)
Пример #13
0
 def test_quality_default(self):
     dataset = DatasetFactory(description='')
     assert dataset.quality == {'score': 0}
Пример #14
0
 def test_next_update_empty(self):
     dataset = DatasetFactory()
     assert dataset.next_update is None
Пример #15
0
 def test_quality_has_opened_formats(self):
     dataset = DatasetFactory(description='', )
     dataset.add_resource(ResourceFactory(format='pdf'))
     dataset.add_resource(ResourceFactory(format='csv'))
     assert not dataset.quality['has_only_closed_or_no_formats']
     assert dataset.quality['score'] == 4
Пример #16
0
 def test_quality_description_length(self):
     dataset = DatasetFactory(description='a' * 42)
     assert dataset.quality['description_length'] == 42
     assert dataset.quality['score'] == 0
     dataset = DatasetFactory(description='a' * 420)
     assert dataset.quality['score'] == 2
Пример #17
0
 def test_tags_normalized(self):
     tags = [' one another!', ' one another!', 'This IS a "tag"…']
     dataset = DatasetFactory(tags=tags)
     assert len(dataset.tags) == 2
     assert dataset.tags[1] == 'this-is-a-tag'
Пример #18
0
 def test_quality_has_opened_formats(self):
     dataset = DatasetFactory(description='', )
     dataset.add_resource(ResourceFactory(format='pdf'))
     dataset.add_resource(ResourceFactory(format='csv'))
     assert not dataset.quality['has_only_closed_or_no_formats']
     assert dataset.quality['score'] == 4
Пример #19
0
 def process(self, item):
     mock_process.send(self, item=item)
     return DatasetFactory.build(title='dataset-{0}'.format(item.remote_id))
Пример #20
0
 def setup(self, app):
     self.resource = ResourceFactory()
     self.dataset = DatasetFactory(resources=[self.resource])
     self.checker = CroquemortLinkChecker()
Пример #21
0
    def test_delete(self):
        '''It should delete the connected user'''
        user = self.login()
        self.assertIsNone(user.deleted)
        other_user = UserFactory()
        members = [Member(user=user), Member(user=other_user)]
        organization = OrganizationFactory(members=members)
        disc_msg_content = faker.sentence()
        disc_msg = DiscMsg(content=disc_msg_content, posted_by=user)
        other_disc_msg_content = faker.sentence()
        other_disc_msg = DiscMsg(content=other_disc_msg_content,
                                 posted_by=other_user)
        discussion = DiscussionFactory(user=user,
                                       discussion=[disc_msg, other_disc_msg])
        issue_msg_content = faker.sentence()
        issue_msg = IssueMsg(content=issue_msg_content, posted_by=user)
        other_issue_msg_content = faker.sentence()
        other_issue_msg = IssueMsg(content=other_issue_msg_content,
                                   posted_by=other_user)
        issue = IssueFactory(user=user,
                             discussion=[issue_msg, other_issue_msg])
        dataset = DatasetFactory(owner=user)
        reuse = ReuseFactory(owner=user)
        resource = CommunityResourceFactory(owner=user)
        activity = UserCreatedDataset.objects().create(actor=user,
                                                       related_to=dataset)

        following = Follow.objects().create(follower=user,
                                            following=other_user)
        followed = Follow.objects().create(follower=other_user, following=user)

        with self.capture_mails() as mails:
            response = self.delete(url_for('api.me'))
        self.assertEqual(len(mails), 1)
        self.assertEqual(mails[0].send_to, set([user.email]))
        self.assertEqual(mails[0].subject, _('Account deletion'))
        self.assert204(response)

        user.reload()
        organization.reload()
        discussion.reload()
        issue.reload()
        dataset.reload()
        reuse.reload()
        resource.reload()
        activity.reload()

        # The following are deleted
        with self.assertRaises(Follow.DoesNotExist):
            following.reload()
        # The followers are deleted
        with self.assertRaises(Follow.DoesNotExist):
            followed.reload()

        # The personal data of the user are anonymized
        self.assertEqual(user.email, '{}@deleted'.format(user.id))
        self.assertEqual(user.password, None)
        self.assertEqual(user.active, False)
        self.assertEqual(user.first_name, 'DELETED')
        self.assertEqual(user.last_name, 'DELETED')
        self.assertFalse(bool(user.avatar))
        self.assertEqual(user.avatar_url, None)
        self.assertEqual(user.website, None)
        self.assertEqual(user.about, None)

        # The user is marked as deleted
        self.assertIsNotNone(user.deleted)

        # The user is removed from his organizations
        self.assertEqual(len(organization.members), 1)
        self.assertEqual(organization.members[0].user.id, other_user.id)

        # The discussions are kept but the messages are anonymized
        self.assertEqual(len(discussion.discussion), 2)
        self.assertEqual(discussion.discussion[0].content, 'DELETED')
        self.assertEqual(discussion.discussion[1].content,
                         other_disc_msg_content)

        # The issues are kept and the messages are not anonymized
        self.assertEqual(len(issue.discussion), 2)
        self.assertEqual(issue.discussion[0].content, issue_msg_content)
        self.assertEqual(issue.discussion[1].content, other_issue_msg_content)

        # The datasets are unchanged
        self.assertEqual(dataset.owner, user)

        # The reuses are unchanged
        self.assertEqual(reuse.owner, user)

        # The community resources are unchanged
        self.assertEqual(resource.owner, user)

        # The activities are unchanged
        self.assertEqual(activity.actor, user)
Пример #22
0
    def test_resources_metric(self, app):
        DatasetFactory.create_batch(3, nb_resources=3)

        site = Site.objects.get(id=app.config['SITE_ID'])
        assert site.metrics['resources'] == 9
Пример #23
0
 def test_last_update_without_resource(self):
     user = UserFactory()
     dataset = DatasetFactory(owner=user)
     assert_equal_dates(dataset.last_update, dataset.last_modified)
Пример #24
0
 def test_last_update_with_resource(self):
     user = UserFactory()
     dataset = DatasetFactory(owner=user)
     resource = ResourceFactory()
     dataset.add_resource(resource)
     assert_equal_dates(dataset.last_update, resource.published)
Пример #25
0
 def test_url_is_stripped(self):
     url = 'http://www.somewhere.com/with/spaces/   '
     dataset = DatasetFactory(resources=[ResourceFactory(url=url)])
     assert dataset.resources[0].url == url.strip()
Пример #26
0
 def datasets(self):
     return DatasetFactory.create_batch(3)
Пример #27
0
 def test_raise_404_if_private(self):
     '''It should raise a 404 if the dataset is private'''
     dataset = DatasetFactory(private=True)
     response = self.get(url_for('datasets.show', dataset=dataset))
     self.assert404(response)
Пример #28
0
 def test_bad_url(self):
     with pytest.raises(db.ValidationError):
         DatasetFactory(resources=[ResourceFactory(url='not-an-url')])
Пример #29
0
 def test_raise_410_if_deleted(self):
     '''It should raise a 410 if the dataset is deleted'''
     dataset = DatasetFactory(deleted=datetime.now())
     response = self.get(url_for('datasets.show', dataset=dataset))
     self.assert410(response)
Пример #30
0
 def test_legacy_frequencies(self):
     for oldFreq, newFreq in LEGACY_FREQUENCIES.items():
         dataset = DatasetFactory(frequency=oldFreq)
         assert dataset.frequency == newFreq
Пример #31
0
 def test_send_on_delete(self):
     dataset = DatasetFactory()
     with assert_emit(Dataset.on_delete):
         dataset.deleted = datetime.now()
         dataset.save()
Пример #32
0
 def test_quality_has_undefined_and_closed_format(self):
     dataset = DatasetFactory(description='', )
     dataset.add_resource(ResourceFactory(format=None))
     dataset.add_resource(ResourceFactory(format='xls'))
     assert dataset.quality['has_only_closed_or_no_formats']
     assert dataset.quality['score'] == 0
Пример #33
0
 def test_community_resource_deleted_dataset(self):
     dataset = DatasetFactory()
     community_resource = CommunityResourceFactory(dataset=dataset)
     community_resource.dataset.delete()
     community_resource.reload()
     assert community_resource.dataset is None
Пример #34
0
 def test_quality_has_undefined_and_closed_format(self):
     dataset = DatasetFactory(description='', )
     dataset.add_resource(ResourceFactory(format=None))
     dataset.add_resource(ResourceFactory(format='xls'))
     assert dataset.quality['has_only_closed_or_no_formats']
     assert dataset.quality['score'] == 0
Пример #35
0
 def setUp(self):
     self.resource = ResourceFactory()
     self.dataset = DatasetFactory(resources=[self.resource])
Пример #36
0
 def test_send_on_delete(self):
     dataset = DatasetFactory()
     with assert_emit(Dataset.on_delete):
         dataset.deleted = datetime.now()
         dataset.save()
def test_display_no_preview_for_no_resource_extra():
    resource = ResourceFactory()
    DatasetFactory(resources=[resource], extras={
        'geop:dataset_id': 'DID'
    })
    assert resource.preview_url is None
Пример #38
0
 def test_quality_next_update(self):
     dataset = DatasetFactory(description='', frequency='weekly')
     assert -6 == dataset.quality['update_in']
     assert dataset.quality['frequency'] == 'weekly'
     assert dataset.quality['score'] == 2
def test_display_no_preview_for_no_dataset_extra():
    resource = ResourceFactory(extras={
        'geop:resource_id': 'RID',
    })
    DatasetFactory(resources=[resource])
    assert resource.preview_url is None
Пример #40
0
 def test_next_update_weekly(self):
     dataset = DatasetFactory(frequency='weekly')
     assert_equal_dates(dataset.next_update,
                        datetime.now() + timedelta(days=7))
Пример #41
0
 def test_ftp_url(self, httpretty):
     resource = ResourceFactory(url='Ftp://etalab.gouv.fr')
     DatasetFactory(resources=[resource])
     res = self.checker.check(resource)
     assert res is None
Пример #42
0
 def test_last_update_with_resource(self):
     user = UserFactory()
     dataset = DatasetFactory(owner=user)
     resource = ResourceFactory()
     dataset.add_resource(resource)
     assert_equal_dates(dataset.last_update, resource.published)
Пример #43
0
    def test_croquemort_not_configured(self, app):
        dataset = DatasetFactory(visible=True)
        checker = CroquemortLinkChecker()

        assert checker.check(dataset.resources[0]) is None
Пример #44
0
 def test_200_if_deleted_but_authorized(self):
     '''It should not raise a 410 if the can view it'''
     self.login()
     dataset = DatasetFactory(deleted=datetime.now(), owner=self.user)
     response = self.get(url_for('datasets.show', dataset=dataset))
     self.assert200(response)
Пример #45
0
 def test_url_is_required(self):
     with pytest.raises(db.ValidationError):
         DatasetFactory(resources=[ResourceFactory(url=None)])
Пример #46
0
    def test_json_ld(self):
        '''It should render a json-ld markup into the dataset page'''
        resource = ResourceFactory(format='png',
                                   description='* Title 1\n* Title 2',
                                   metrics={'views': 10})
        license = LicenseFactory(url='http://www.datagouv.fr/licence')
        dataset = DatasetFactory(license=license,
                                 tags=['foo', 'bar'],
                                 resources=[resource],
                                 description='a&éèëù$£',
                                 owner=UserFactory(),
                                 extras={'foo': 'bar'})
        community_resource = CommunityResourceFactory(
            dataset=dataset,
            format='csv',
            description='* Title 1\n* Title 2',
            metrics={'views': 42})

        url = url_for('datasets.show', dataset=dataset)
        response = self.get(url)
        self.assert200(response)
        json_ld = self.get_json_ld(response)
        self.assertEquals(json_ld['@context'], 'http://schema.org')
        self.assertEquals(json_ld['@type'], 'Dataset')
        self.assertEquals(json_ld['@id'], str(dataset.id))
        self.assertEquals(json_ld['description'], 'a&éèëù$£')
        self.assertEquals(json_ld['alternateName'], dataset.slug)
        self.assertEquals(json_ld['dateCreated'][:16],
                          dataset.created_at.isoformat()[:16])
        self.assertEquals(json_ld['dateModified'][:16],
                          dataset.last_modified.isoformat()[:16])
        self.assertEquals(json_ld['url'], 'http://localhost{}'.format(url))
        self.assertEquals(json_ld['name'], dataset.title)
        self.assertEquals(json_ld['keywords'], 'bar,foo')
        self.assertEquals(len(json_ld['distribution']), 1)

        json_ld_resource = json_ld['distribution'][0]
        self.assertEquals(json_ld_resource['@type'], 'DataDownload')
        self.assertEquals(json_ld_resource['@id'], str(resource.id))
        self.assertEquals(json_ld_resource['url'], resource.latest)
        self.assertEquals(json_ld_resource['name'], resource.title)
        self.assertEquals(json_ld_resource['contentUrl'], resource.url)
        self.assertEquals(json_ld_resource['dateCreated'][:16],
                          resource.created_at.isoformat()[:16])
        self.assertEquals(json_ld_resource['dateModified'][:16],
                          resource.modified.isoformat()[:16])
        self.assertEquals(json_ld_resource['datePublished'][:16],
                          resource.published.isoformat()[:16])
        self.assertEquals(json_ld_resource['encodingFormat'], 'png')
        self.assertEquals(json_ld_resource['contentSize'], resource.filesize)
        self.assertEquals(json_ld_resource['fileFormat'], resource.mime)
        self.assertEquals(json_ld_resource['description'], 'Title 1 Title 2')
        self.assertEquals(
            json_ld_resource['interactionStatistic'], {
                '@type': 'InteractionCounter',
                'interactionType': {
                    '@type': 'DownloadAction',
                },
                'userInteractionCount': 10,
            })

        self.assertEquals(len(json_ld['contributedDistribution']), 1)
        json_ld_resource = json_ld['contributedDistribution'][0]
        self.assertEquals(json_ld_resource['@type'], 'DataDownload')
        self.assertEquals(json_ld_resource['@id'], str(community_resource.id))
        self.assertEquals(json_ld_resource['url'], community_resource.latest)
        self.assertEquals(json_ld_resource['name'], community_resource.title)
        self.assertEquals(json_ld_resource['contentUrl'],
                          community_resource.url)
        self.assertEquals(json_ld_resource['dateCreated'][:16],
                          community_resource.created_at.isoformat()[:16])
        self.assertEquals(json_ld_resource['dateModified'][:16],
                          community_resource.modified.isoformat()[:16])
        self.assertEquals(json_ld_resource['datePublished'][:16],
                          community_resource.published.isoformat()[:16])
        self.assertEquals(json_ld_resource['encodingFormat'],
                          community_resource.format)
        self.assertEquals(json_ld_resource['contentSize'],
                          community_resource.filesize)
        self.assertEquals(json_ld_resource['fileFormat'],
                          community_resource.mime)
        self.assertEquals(json_ld_resource['description'], 'Title 1 Title 2')
        self.assertEquals(
            json_ld_resource['interactionStatistic'], {
                '@type': 'InteractionCounter',
                'interactionType': {
                    '@type': 'DownloadAction',
                },
                'userInteractionCount': 42,
            })

        self.assertEquals(json_ld['extras'], [{
            '@type': 'http://schema.org/PropertyValue',
            'name': 'foo',
            'value': 'bar',
        }])
        self.assertEquals(json_ld['license'], 'http://www.datagouv.fr/licence')
        self.assertEquals(json_ld['author']['@type'], 'Person')
Пример #47
0
 def test_redirect_datasets(self, client):
     dataset = DatasetFactory()
     response = client.get('/en/dataset/%s/' % dataset.slug)
     assert_redirects(response, url_for('datasets.show', dataset=dataset))