def process_enclosure(self, dataset, enclosure): filename = getattr(enclosure, 'title', basename(enclosure.href)) datafile = Dataset_File(filename=filename, dataset=dataset) try: datafile.mimetype = enclosure.mime except AttributeError: pass try: datafile.size = enclosure.length except AttributeError: pass try: hash = enclosure.hash # Split on white space, then ':' to get tuples to feed into dict hashdict = dict([s.partition(':')[::2] for s in hash.split()]) # Set SHA-512 sum datafile.sha512sum = hashdict['sha-512'] except AttributeError: pass datafile.save() url = enclosure.href # This means we will allow the atom feed to feed us any enclosure # URL that matches a registered location. Maybe we should restrict # this to a specific location. location = Location.get_location_for_url(url) if not location: logger.error('Rejected ingestion for unknown location %s' % url) return replica = Replica(datafile=datafile, url=url, location=location) replica.protocol = enclosure.href.partition('://')[0] replica.save() self.make_local_copy(replica)
def _infer_location(path): if urlparse.urlparse(path).scheme == '': loc = Location.get_default_location() else: loc = Location.get_location_for_url(path) if loc: return loc else: raise Exception('Cannot infer a location for %s' % path)
def _build_datafile(self, testfile, filename, dataset, url, protocol='', checksum=None, size=None, mimetype=''): filesize, sha512sum = get_size_and_sha512sum(testfile) datafile = Dataset_File( dataset=dataset, filename=filename, mimetype=mimetype, size=str(size if size != None else filesize), sha512sum=(checksum if checksum else sha512sum)) datafile.save() if urlparse.urlparse(url).scheme == '': location = Location.get_location('local') else: location = Location.get_location_for_url(url) if not location: location = Location.load_location({ 'name': filename, 'url': urlparse.urljoin(url, '.'), 'type': 'external', 'priority': 10, 'transfer_provider': 'local' }) replica = Replica(datafile=datafile, protocol=protocol, url=url, location=location) replica.verify() replica.save() return Dataset_File.objects.get(pk=datafile.pk)
def _build_datafile(self, testfile, filename, dataset, url, protocol='', checksum=None, size=None, mimetype=''): filesize, sha512sum = get_size_and_sha512sum(testfile) datafile = Dataset_File(dataset=dataset, filename=filename, mimetype=mimetype, size=str(size if size != None else filesize), sha512sum=(checksum if checksum else sha512sum)) datafile.save() if urlparse.urlparse(url).scheme == '': location = Location.get_location('local') else: location = Location.get_location_for_url(url) if not location: location = Location.load_location({ 'name': filename, 'url': urlparse.urljoin(url, '.'), 'type': 'external', 'priority': 10, 'transfer_provider': 'local'}) replica = Replica(datafile=datafile, protocol=protocol, url=url, location=location) replica.verify() replica.save() return Dataset_File.objects.get(pk=datafile.pk)