示例#1
0
    def process_enclosure(self, dataset, enclosure):
        filename = getattr(enclosure, 'title', basename(enclosure.href))
        datafile = Dataset_File(filename=filename, dataset=dataset)
        try:
            datafile.mimetype = enclosure.mime
        except AttributeError:
            pass
        try:
            datafile.size = enclosure.length
        except AttributeError:
            pass
        try:
            hash = enclosure.hash
            # Split on white space, then ':' to get tuples to feed into dict
            hashdict = dict([s.partition(':')[::2] for s in hash.split()])
            # Set SHA-512 sum
            datafile.sha512sum = hashdict['sha-512']
        except AttributeError:
            pass
        datafile.save()
        url = enclosure.href
        # This means we will allow the atom feed to feed us any enclosure
        # URL that matches a registered location.  Maybe we should restrict
        # this to a specific location.
        location = Location.get_location_for_url(url)
        if not location:
            logger.error('Rejected ingestion for unknown location %s' % url)
            return

        replica = Replica(datafile=datafile, url=url, location=location)
        replica.protocol = enclosure.href.partition('://')[0]
        replica.save()
        self.make_local_copy(replica)
示例#2
0
    def process_enclosure(self, dataset, enclosure):
        filename = getattr(enclosure, 'title', basename(enclosure.href))
        datafile = Dataset_File(filename=filename, dataset=dataset)
        try:
            datafile.mimetype = enclosure.mime
        except AttributeError:
            pass
        try:
            datafile.size = enclosure.length
        except AttributeError:
            pass
        try:
            hash = enclosure.hash
            # Split on white space, then ':' to get tuples to feed into dict
            hashdict = dict([s.partition(':')[::2] for s in hash.split()])
            # Set SHA-512 sum
            datafile.sha512sum = hashdict['sha-512']
        except AttributeError:
            pass
        datafile.save()
        url = enclosure.href
        # This means we will allow the atom feed to feed us any enclosure
        # URL that matches a registered location.  Maybe we should restrict
        # this to a specific location.
        location = Location.get_location_for_url(url)
        if not location:
            logger.error('Rejected ingestion for unknown location %s' % url)
            return

        replica = Replica(datafile=datafile, url=url,
                          location=location)
        replica.protocol = enclosure.href.partition('://')[0]
        replica.save()
        self.make_local_copy(replica)
示例#3
0
def _infer_location(path):
    if urlparse.urlparse(path).scheme == '':
        loc = Location.get_default_location()
    else:
        loc = Location.get_location_for_url(path)
    if loc:
        return loc
    else:
        raise Exception('Cannot infer a location for %s' % path)
示例#4
0
def _infer_location(path):
    if urlparse.urlparse(path).scheme == '':
        loc = Location.get_default_location()
    else:
        loc = Location.get_location_for_url(path)
    if loc:
        return loc
    else:
        raise Exception('Cannot infer a location for %s' % path)
示例#5
0
 def _build_datafile(self,
                     testfile,
                     filename,
                     dataset,
                     url,
                     protocol='',
                     checksum=None,
                     size=None,
                     mimetype=''):
     filesize, sha512sum = get_size_and_sha512sum(testfile)
     datafile = Dataset_File(
         dataset=dataset,
         filename=filename,
         mimetype=mimetype,
         size=str(size if size != None else filesize),
         sha512sum=(checksum if checksum else sha512sum))
     datafile.save()
     if urlparse.urlparse(url).scheme == '':
         location = Location.get_location('local')
     else:
         location = Location.get_location_for_url(url)
         if not location:
             location = Location.load_location({
                 'name':
                 filename,
                 'url':
                 urlparse.urljoin(url, '.'),
                 'type':
                 'external',
                 'priority':
                 10,
                 'transfer_provider':
                 'local'
             })
     replica = Replica(datafile=datafile,
                       protocol=protocol,
                       url=url,
                       location=location)
     replica.verify()
     replica.save()
     return Dataset_File.objects.get(pk=datafile.pk)
示例#6
0
 def _build_datafile(self, testfile, filename, dataset, url, 
                     protocol='', checksum=None, size=None, mimetype=''):
     filesize, sha512sum = get_size_and_sha512sum(testfile)
     datafile = Dataset_File(dataset=dataset, filename=filename,
                             mimetype=mimetype,
                             size=str(size if size != None else filesize), 
                             sha512sum=(checksum if checksum else sha512sum))
     datafile.save()
     if urlparse.urlparse(url).scheme == '':
         location = Location.get_location('local')
     else:
         location = Location.get_location_for_url(url)
         if not location:
             location = Location.load_location({
                 'name': filename, 'url': urlparse.urljoin(url, '.'), 
                 'type': 'external', 
                 'priority': 10, 'transfer_provider': 'local'})
     replica = Replica(datafile=datafile, protocol=protocol, url=url,
                       location=location)
     replica.verify()
     replica.save()
     return Dataset_File.objects.get(pk=datafile.pk)