def test_first_file(self):
     fd = FilenameDeduplicator()
     assert_equals(fd.deduplicate('index.html'), 'index.html')
 def test_reset(self):
     fd = FilenameDeduplicator()
     assert_equals(fd.deduplicate('index.html'), 'index.html')
     fd.reset()
     assert_equals(fd.deduplicate('index.html'), 'index.html')
 def test_unicode_filename(self):
     fd = FilenameDeduplicator()
     assert_equals(fd.deduplicate(u'\xa3'), u'\xa3')
     assert_equals(fd.deduplicate(u'\xa3'), u'\xa31')
 def test_second_file(self):
     fd = FilenameDeduplicator()
     assert_equals(fd.deduplicate('index.html'), 'index.html')
     assert_equals(fd.deduplicate('robots.txt'), 'robots.txt')
 def test_no_extension(self):
     fd = FilenameDeduplicator()
     assert_equals(fd.deduplicate('index'), 'index')
     assert_equals(fd.deduplicate('index'), 'index1')
 def test_second_file(self):
     fd  = FilenameDeduplicator()
     assert_equals(fd.deduplicate('index.html'), 'index.html')
     assert_equals(fd.deduplicate('robots.txt'), 'robots.txt')
Пример #7
0
    fd = FilenameDeduplicator()
    for res in pkg['resources']:
        archival = Archival.get_for_resource(res['id'])
        if archival and archival.cache_filepath:
            # We have archived it, and we have a path.
            _, resource_id, filename = archival.cache_filepath.rsplit('/', 2)
            cache_filepath = archival.cache_filepath
        else:
            # Try and work out the filename from the URL.
            try:
                _, filename = res['url'].rsplit('/', 1)
            except ValueError:
                filename = res['id']
            cache_filepath = ''

        filename = fd.deduplicate(filename)
        resource_json = {
            'url': res['url'],
            'path': u'data/{0}'.format(filename),
            'cache_filepath': cache_filepath,
            'description': res['description']
        }
        resource_json['has_data'], resource_json['detected_format'] = \
            resource_has_data(res)

        # If we have archived the data, but the link was broken
        # then record the reason.
        if archival and archival.is_broken:
            resource_json['reason'] = archival.reason

        format = datapackage_format(res['format'])
 def test_first_file(self):
     fd  = FilenameDeduplicator()
     assert_equals(fd.deduplicate('index.html'), 'index.html')
 def test_unicode_filename(self):
     fd  = FilenameDeduplicator()
     assert_equals(fd.deduplicate(u'\xa3'), u'\xa3')
     assert_equals(fd.deduplicate(u'\xa3'), u'\xa31')
 def test_reset(self):
     fd  = FilenameDeduplicator()
     assert_equals(fd.deduplicate('index.html'), 'index.html')
     fd.reset()
     assert_equals(fd.deduplicate('index.html'), 'index.html')
 def test_no_extension(self):
     fd  = FilenameDeduplicator()
     assert_equals(fd.deduplicate('index'), 'index')
     assert_equals(fd.deduplicate('index'), 'index1')
Пример #12
0
    fd = FilenameDeduplicator()
    for res in pkg["resources"]:
        archival = Archival.get_for_resource(res["id"])
        if archival and archival.cache_filepath:
            # We have archived it, and we have a path.
            _, resource_id, filename = archival.cache_filepath.rsplit("/", 2)
            cache_filepath = archival.cache_filepath
        else:
            # Try and work out the filename from the URL.
            try:
                _, filename = res["url"].rsplit("/", 1)
            except ValueError:
                filename = res["id"]
            cache_filepath = ""

        filename = fd.deduplicate(filename)
        resource_json = {
            "url": res["url"],
            "path": u"data/{0}".format(filename),
            "cache_filepath": cache_filepath,
            "description": res["description"],
        }
        resource_json["has_data"], resource_json["detected_format"] = resource_has_data(res)

        # If we have archived the data, but the link was broken
        # then record the reason.
        if archival and archival.is_broken:
            resource_json["reason"] = archival.reason

        format = datapackage_format(res["format"])
        if format: