Python License.guess示例，udata.models.License.guess Python示例

示例#1

0

显示文件

 def test_match_by_alternate_title_with_multiple_candidates_from_one_licence(
         self):
     license = LicenseFactory(
         alternate_titles=['Licence Ouverte v2', 'Licence Ouverte v2.0'])
     found = License.guess('Licence Ouverte v2.0')
     assert isinstance(found, License)
     assert license.id == found.id

示例#2

0

显示文件

文件： test_dataset_model.py 项目： odtvince/udata

 def test_prioritize_title_over_alternate_title(self):
     title = faker.sentence()
     license = LicenseFactory(title=title)
     LicenseFactory(alternate_titles=[title])
     found = License.guess(title)
     assert isinstance(found, License)
     assert license.id == found.id

示例#3

0

显示文件

文件： test_dataset_model.py 项目： augusto-herrmann/udata

 def test_prioritize_title_over_alternate_title(self):
     title = faker.sentence()
     license = LicenseFactory(title=title)
     LicenseFactory(alternate_titles=[title])
     found = License.guess(title)
     assert isinstance(found, License)
     assert license.id == found.id

示例#4

0

显示文件

    def process(self, item):
        dataset = self.get_dataset(item.remote_id)
        # Here you comes your implementation. You should :
        # - fetch the remote dataset (if necessary)
        # - validate the fetched payload
        # - map its content to the dataset fields
        # - store extra significant data in the `extra` attribute
        # - map resources data

        kwargs = item.kwargs
        item = kwargs['item']

        dataset.title = item['title']
        dataset.license = License.guess('cc-by')
        dataset.tags = ["snig.dgterritorio.gov.pt"]
        dataset.description = item['description']

        if item.get('date'):
            dataset.created_at = item['date']

        for keyword in item.get('keywords'):
            dataset.tags.append(keyword)

        # Force recreation of all resources
        dataset.resources = []

        for resource in item.get("resources"):

            parsed = urlparse.urlparse(resource['url'])
            try:
                format = str(urlparse.parse_qs(parsed.query)['service'][0])
            except KeyError:
                format = resource['url'].split('.')[-1]

            new_resource = Resource(title=item['title'],
                                    url=resource['url'],
                                    filetype='remote',
                                    format=format)

            dataset.resources.append(new_resource)

        dataset.extras['harvest:name'] = self.source.name

        return dataset

示例#5

0

显示文件

    def process(self, item):
        dataset = self.get_dataset(item.remote_id)
        # Here you comes your implementation. You should :
        # - fetch the remote dataset (if necessary)
        # - validate the fetched payload
        # - map its content to the dataset fields
        # - store extra significant data in the `extra` attribute
        # - map resources data

        kwargs = item.kwargs
        dataset.title = kwargs['title']
        dataset.license = License.guess('cc-by')
        dataset.tags = ["apambiente.pt"]
        item = kwargs['item']

        dataset.description = item['summary']

        if kwargs['date']:
            dataset.created_at = kwargs['date']

        # Force recreation of all resources
        dataset.resources = []
        for resource in item['links']:
            url = resource['href'].replace('\\', '').replace(' ', '%20')
            type = resource['type']

            if type == 'details':
                dataset.description += "<br>"
                dataset.description += "<br>Mais detalhes : <a href=\"%s\" target=\"_blank\">%s</a>" % (
                    url, dataset.title)

            if type == 'open':
                url_parts = list(urlparse.urlparse(url))
                parts = url_parts[2].split('.')
                format = parts[-1] if len(parts) > 1 else 'wms'
                new_resource = Resource(title=dataset.title,
                                        url=url,
                                        filetype='remote',
                                        format=format.lower())
                dataset.resources.append(new_resource)

        return dataset

示例#6

0

显示文件

    def process(self, item):
        dataset = self.get_dataset(item.remote_id)
        # Here you comes your implementation. You should :
        # - fetch the remote dataset (if necessary)
        # - validate the fetched payload
        # - map its content to the dataset fields
        # - store extra significant data in the `extra` attribute
        # - map resources data

        kwargs = item.kwargs
        dataset.title = kwargs['title']
        dataset.license = License.guess('cc-by')
        dataset.tags = ["apambiente.pt"]
        item = kwargs['item']

        dataset.description = item.get('description')

        if kwargs['date']:
            dataset.created_at = kwargs['date']

        # Force recreation of all resources
        dataset.resources = []

        url = item.get('url')

        if item.get('type') == "liveData":
            type = "wms"
        else:
            type = url.split('.')[-1].lower()
            if len(type) > 3:
                type = "wms"

        new_resource = Resource(title=dataset.title,
                                url=url,
                                filetype='remote',
                                format=type)
        dataset.resources.append(new_resource)

        return dataset

示例#7

0

显示文件

文件： odspt.py 项目： amagovpt/gouvpt

    def process(self, item):
        ods_dataset = item.kwargs['dataset']
        dataset_id = ods_dataset['datasetid']
        ods_metadata = ods_dataset['metas']
        ods_interopmetas = ods_dataset.get('interop_metas', {})

        if not ods_dataset.get('has_records'):
            msg = 'Dataset {datasetid} has no record'.format(**ods_dataset)
            raise HarvestSkipException(msg)

        if 'inspire' in ods_interopmetas and not self.has_feature('inspire'):
            msg = 'Dataset {datasetid} has INSPIRE metadata'
            raise HarvestSkipException(msg.format(**ods_dataset))

        dataset = self.get_dataset(item.remote_id)

        dataset.title = ods_metadata['title']
        dataset.frequency = 'unknown'
        description = ods_metadata.get('description', '').strip()
        dataset.description = parse_html(description)
        dataset.private = False

        # Detect Organization
        try:
            organization_acronym = ods_metadata['publisher']
        except KeyError:
            pass
        else:
            orgObj = Organization.objects(acronym=organization_acronym).first()
            if orgObj:
                dataset.organization = orgObj
            else:
                orgObj = Organization()
                orgObj.acronym = organization_acronym
                orgObj.name = organization_acronym
                orgObj.description = organization_acronym
                orgObj.save()

                dataset.organization = orgObj

        tags = set()
        if 'keyword' in ods_metadata:
            if isinstance(ods_metadata['keyword'], list):
                tags |= set(ods_metadata['keyword'])
            else:
                tags.add(ods_metadata['keyword'])

        if 'theme' in ods_metadata:
            if isinstance(ods_metadata['theme'], list):
                for theme in ods_metadata['theme']:
                    tags.update([t.strip().lower() for t in theme.split(',')])
            else:
                themes = ods_metadata['theme'].split(',')
                tags.update([t.strip().lower() for t in themes])

        dataset.tags = list(tags)
        dataset.tags.append(urlparse(self.source.url).hostname)

        # Detect license
        default_license = dataset.license or License.default()
        license_id = ods_metadata.get('license')
        dataset.license = License.guess(license_id,
                                        self.LICENSES.get(license_id),
                                        default=default_license)

        self.process_resources(dataset, ods_dataset, ('csv', 'json'))

        if 'geo' in ods_dataset['features']:
            exports = ['geojson']
            if ods_metadata['records_count'] <= self.SHAPEFILE_RECORDS_LIMIT:
                exports.append('shp')
            self.process_resources(dataset, ods_dataset, exports)

        self.process_extra_files(dataset, ods_dataset, 'alternative_export')
        self.process_extra_files(dataset, ods_dataset, 'attachment')

        dataset.extras['ods:url'] = self.explore_url(dataset_id)
        dataset.extras['harvest:name'] = self.source.name
        
        if 'references' in ods_metadata:
            dataset.extras['ods:references'] = ods_metadata['references']
        dataset.extras['ods:has_records'] = ods_dataset['has_records']
        dataset.extras['ods:geo'] = 'geo' in ods_dataset['features']

        return dataset

示例#8

0

显示文件

文件： test_dataset_model.py 项目： odtvince/udata

 def test_exact_match_by_title(self):
     license = LicenseFactory()
     found = License.guess(license.title)
     assert isinstance(found, License)
     assert license.id == found.id

示例#9

0

显示文件

 def test_not_found(self):
     found = License.guess('should not be found')
     self.assertIsNone(found)

示例#10

0

显示文件

文件： test_dataset_model.py 项目： odtvince/udata

 def test_not_found_with_default(self):
     license = LicenseFactory()
     found = License.guess('should not be found', default=license)
     assert found.id == license.id

示例#11

0

显示文件

文件： test_dataset_model.py 项目： augusto-herrmann/udata

 def test_match_by_alternate_title_with_extra_inner_space(self):
     license = LicenseFactory(alternate_titles=['License ODBl'])
     found = License.guess('License  ODBl')  # 2 spaces instead of 1
     assert isinstance(found, License)
     assert license.id == found.id

示例#12

0

显示文件

文件： test_dataset_model.py 项目： augusto-herrmann/udata

 def test_exact_match_by_alternate_title_with_spaces(self):
     alternate_title = faker.sentence()
     license = LicenseFactory(alternate_titles=[alternate_title])
     found = License.guess(' {0} '.format(alternate_title))
     assert isinstance(found, License)
     assert license.id == found.id

示例#13

0

显示文件

文件： test_dataset_model.py 项目： augusto-herrmann/udata

 def test_exact_match_by_title(self):
     license = LicenseFactory()
     found = License.guess(license.title)
     assert isinstance(found, License)
     assert license.id == found.id

示例#14

0

显示文件

文件： test_dataset_model.py 项目： odtvince/udata

 def test_multiple_strings(self):
     license = LicenseFactory()
     found = License.guess('should not match', license.id)
     assert isinstance(found, License)
     assert license.id == found.id

示例#15

0

显示文件

文件： test_dataset_model.py 项目： odtvince/udata

 def test_empty_string(self):
     found = License.guess('')
     assert found is None

示例#16

0

显示文件

文件： test_dataset_model.py 项目： odtvince/udata

 def test_match_by_alternate_title_with_mismatching_case(self):
     license = LicenseFactory(alternate_titles=['License ODBl'])
     found = License.guess('License ODBL')
     assert isinstance(found, License)
     assert license.id == found.id

示例#17

0

显示文件

文件： test_dataset_model.py 项目： odtvince/udata

 def test_match_by_alternate_title_with_extra_inner_space(self):
     license = LicenseFactory(alternate_titles=['License ODBl'])
     found = License.guess('License  ODBl')  # 2 spaces instead of 1
     assert isinstance(found, License)
     assert license.id == found.id

示例#18

0

显示文件

文件： test_dataset_model.py 项目： odtvince/udata

 def test_match_by_alternate_title_with_low_edit_distance(self):
     license = LicenseFactory(alternate_titles=['License'])
     found = License.guess('Licence')
     assert isinstance(found, License)
     assert license.id == found.id

示例#19

0

显示文件

文件： test_dataset_model.py 项目： odtvince/udata

 def test_exact_match_by_alternate_title_with_spaces(self):
     alternate_title = faker.sentence()
     license = LicenseFactory(alternate_titles=[alternate_title])
     found = License.guess(' {0} '.format(alternate_title))
     assert isinstance(found, License)
     assert license.id == found.id

示例#20

0

显示文件

文件： test_dataset_model.py 项目： odtvince/udata

 def test_exact_match_by_title_with_spaces(self):
     license = LicenseFactory()
     found = License.guess(' {0} '.format(license.title))
     assert isinstance(found, License)
     assert license.id == found.id

示例#21

0

显示文件

文件： test_dataset_model.py 项目： odtvince/udata

 def test_not_found(self):
     found = License.guess('should not be found')
     assert found is None

示例#22

0

显示文件

文件： test_dataset_model.py 项目： augusto-herrmann/udata

 def test_imatch_by_alternate_url(self):
     alternate_url = '%s/CAPS.php' % faker.uri()
     license = LicenseFactory(alternate_urls=[alternate_url])
     found = License.guess(alternate_url)
     assert isinstance(found, License)
     assert license.id == found.id

示例#23

0

显示文件

文件： test_dataset_model.py 项目： odtvince/udata

 def test_none(self):
     found = License.guess(None)
     assert found is None

示例#24

0

显示文件

文件： test_dataset_model.py 项目： augusto-herrmann/udata

 def test_exact_match_by_title_with_spaces(self):
     license = LicenseFactory()
     found = License.guess(' {0} '.format(license.title))
     assert isinstance(found, License)
     assert license.id == found.id

示例#25

0

显示文件

 def test_none(self):
     found = License.guess(None)
     self.assertIsNone(found)

示例#26

0

显示文件

文件： test_dataset_model.py 项目： augusto-herrmann/udata

 def test_match_by_alternate_title_with_low_edit_distance(self):
     license = LicenseFactory(alternate_titles=['License'])
     found = License.guess('Licence')
     assert isinstance(found, License)
     assert license.id == found.id

示例#27

0

显示文件

 def test_exact_match_by_title(self):
     license = LicenseFactory()
     found = License.guess(license.title)
     self.assertIsInstance(found, License)
     self.assertEqual(license.id, found.id)

示例#28

0

显示文件

文件： test_dataset_model.py 项目： augusto-herrmann/udata

 def test_match_by_alternate_title_with_mismatching_case(self):
     license = LicenseFactory(alternate_titles=['License ODBl'])
     found = License.guess('License ODBL')
     assert isinstance(found, License)
     assert license.id == found.id

示例#29

0

显示文件

 def test_match_by_title_with_low_edit_distance(self):
     license = LicenseFactory(title='License')
     found = License.guess('Licence')
     self.assertIsInstance(found, License)
     self.assertEqual(license.id, found.id)

示例#30

0

显示文件

文件： test_dataset_model.py 项目： augusto-herrmann/udata

 def test_multiple_strings(self):
     license = LicenseFactory()
     found = License.guess('should not match', license.id)
     assert isinstance(found, License)
     assert license.id == found.id

示例#31

0

显示文件

 def test_match_by_title_with_mismatching_case(self):
     license = LicenseFactory(title='License ODBl')
     found = License.guess('License ODBL')
     self.assertIsInstance(found, License)
     self.assertEqual(license.id, found.id)

示例#32

0

显示文件

 def test_not_found_with_default(self):
     license = LicenseFactory()
     found = License.guess('should not be found', default=license)
     self.assertEqual(found.id, license.id)

示例#33

0

显示文件

文件： test_dataset_model.py 项目： odtvince/udata

 def test_exact_match_by_alternate_url(self):
     alternate_url = faker.uri()
     license = LicenseFactory(alternate_urls=[alternate_url])
     found = License.guess(alternate_url)
     assert isinstance(found, License)
     assert license.id == found.id

示例#34

0

显示文件

 def test_empty_string(self):
     found = License.guess('')
     self.assertIsNone(found)

示例#35

0

显示文件

文件： test_dataset_model.py 项目： augusto-herrmann/udata

 def test_not_found_with_default(self):
     license = LicenseFactory()
     found = License.guess('should not be found', default=license)
     assert found.id == license.id

示例#36

0

显示文件

 def test_exact_match_by_title_with_spaces(self):
     license = LicenseFactory()
     found = License.guess(' {0} '.format(license.title))
     self.assertIsInstance(found, License)
     self.assertEqual(license.id, found.id)

示例#37

0

显示文件

文件： test_dataset_model.py 项目： augusto-herrmann/udata

 def test_none(self):
     found = License.guess(None)
     assert found is None

示例#38

0

显示文件

 def test_match_by_title_with_extra_inner_space(self):
     license = LicenseFactory(title='License ODBl')
     found = License.guess('License  ODBl')  # 2 spaces instead of 1
     self.assertIsInstance(found, License)
     self.assertEqual(license.id, found.id)

示例#39

0

显示文件

文件： test_dataset_model.py 项目： augusto-herrmann/udata

 def test_empty_string(self):
     found = License.guess('')
     assert found is None

示例#40

0

显示文件

 def test_multiple_strings(self):
     license = LicenseFactory()
     found = License.guess('should not match', license.id)
     self.assertIsInstance(found, License)
     self.assertEqual(license.id, found.id)

示例#41

0

显示文件

文件： test_dataset_model.py 项目： augusto-herrmann/udata

 def test_imatch_by_id(self):
     license = LicenseFactory(id='CAPS-ID')
     found = License.guess(license.id)
     assert isinstance(found, License)
     assert license.id == found.id

示例#42

0

显示文件

文件： harvesters.py 项目： opendatateam/udata-ods

    def process(self, item):
        dataset_id = item.remote_id
        response = self.get(self.api_dataset_url(dataset_id),
                            params={'interopmetas': 'true'})
        response.raise_for_status()
        ods_dataset = response.json()
        ods_metadata = ods_dataset['metas']
        ods_interopmetas = ods_dataset.get('interop_metas', {})

        if not any((ods_dataset.get(attr) for attr
                    in ('has_records', 'attachments', 'alternative_exports'))):
            msg = 'Dataset {datasetid} has no record'.format(**ods_dataset)
            raise HarvestSkipException(msg)

        if 'inspire' in ods_interopmetas and not self.has_feature('inspire'):
            msg = 'Dataset {datasetid} has INSPIRE metadata'
            raise HarvestSkipException(msg.format(**ods_dataset))

        dataset = self.get_dataset(item.remote_id)

        dataset.title = ods_metadata['title']
        dataset.frequency = 'unknown'
        description = ods_metadata.get('description', '').strip()
        dataset.description = parse_html(description)
        dataset.private = False
        dataset.last_modified = ods_metadata['modified']

        tags = set()
        if 'keyword' in ods_metadata:
            if isinstance(ods_metadata['keyword'], list):
                tags |= set(ods_metadata['keyword'])
            else:
                tags.add(ods_metadata['keyword'])

        if 'theme' in ods_metadata:
            if isinstance(ods_metadata['theme'], list):
                for theme in ods_metadata['theme']:
                    tags.update([t.strip().lower() for t in theme.split(',')])
            else:
                themes = ods_metadata['theme'].split(',')
                tags.update([t.strip().lower() for t in themes])

        dataset.tags = list(tags)

        # Detect license
        default_license = dataset.license or License.default()
        license_id = ods_metadata.get('license')
        dataset.license = License.guess(license_id,
                                        self.LICENSES.get(license_id),
                                        default=default_license)

        self.process_resources(dataset, ods_dataset, ('csv', 'json'))

        if 'geo' in ods_dataset['features']:
            exports = ['geojson']
            if ods_metadata['records_count'] <= self.SHAPEFILE_RECORDS_LIMIT:
                exports.append('shp')
            self.process_resources(dataset, ods_dataset, exports)

        self.process_extra_files(dataset, ods_dataset, 'alternative_export')
        self.process_extra_files(dataset, ods_dataset, 'attachment')

        dataset.extras['ods:url'] = self.explore_url(dataset_id)
        dataset.extras['remote_url'] = self.explore_url(dataset_id)
        if 'references' in ods_metadata:
            dataset.extras['ods:references'] = ods_metadata['references']
        dataset.extras['ods:has_records'] = ods_dataset['has_records']
        dataset.extras['ods:geo'] = 'geo' in ods_dataset['features']

        return dataset

示例#43

0

显示文件

文件： test_dataset_model.py 项目： augusto-herrmann/udata

 def test_imatch_by_url(self):
     url = '%s/CAPS.php' % faker.uri()
     license = LicenseFactory(url=url)
     found = License.guess(license.url)
     assert isinstance(found, License)
     assert license.id == found.id