示例#1
0
def get_ref_for_url(url, version, task_id):
    """
    Returns a SW2_ConcreteReference for the data stored at the given URL.
    Currently, the version is ignored, but we imagine using this for e.g.
    HTTP ETags, which would raise an error if the data changed.
    """

    parsed_url = urlparse.urlparse(url)
    if parsed_url.scheme == 'swbs':
        # URL is in a Skywriting Block Store, so we can make a reference
        # for it directly.
        id = parsed_url.path[1:]
        ref = SW2_ConcreteReference(id, None)
        ref.add_location_hint(parsed_url.netloc)
    else:
        # URL is outside the cluster, so we have to fetch it. We use
        # content-based addressing to name the fetched data.
        hash = hashlib.sha1()

        # 1. Fetch URL to a file-like object.
        with contextlib.closing(urllib2.urlopen(url)) as url_file:

            # 2. Hash its contents and write it to disk.
            with tempfile.NamedTemporaryFile('wb', 4096,
                                             delete=False) as fetch_file:
                fetch_filename = fetch_file.name
                while True:
                    chunk = url_file.read(4096)
                    if not chunk:
                        break
                    hash.update(chunk)
                    fetch_file.write(chunk)

        # 3. Store the fetched file in the block store, named by the
        #    content hash.
        id = 'urlfetch:%s' % hash.hexdigest()
        ref = ref_from_external_file(fetch_filename, id)

    return ref
示例#2
0
def get_ref_for_url(url, version, task_id):
    """
    Returns a SW2_ConcreteReference for the data stored at the given URL.
    Currently, the version is ignored, but we imagine using this for e.g.
    HTTP ETags, which would raise an error if the data changed.
    """

    parsed_url = urlparse.urlparse(url)
    if parsed_url.scheme == "swbs":
        # URL is in a Skywriting Block Store, so we can make a reference
        # for it directly.
        id = parsed_url.path[1:]
        ref = SW2_ConcreteReference(id, None)
        ref.add_location_hint(parsed_url.netloc)
    else:
        # URL is outside the cluster, so we have to fetch it. We use
        # content-based addressing to name the fetched data.
        hash = hashlib.sha1()

        # 1. Fetch URL to a file-like object.
        with contextlib.closing(urllib2.urlopen(url)) as url_file:

            # 2. Hash its contents and write it to disk.
            with tempfile.NamedTemporaryFile("wb", 4096, delete=False) as fetch_file:
                fetch_filename = fetch_file.name
                while True:
                    chunk = url_file.read(4096)
                    if not chunk:
                        break
                    hash.update(chunk)
                    fetch_file.write(chunk)

        # 3. Store the fetched file in the block store, named by the
        #    content hash.
        id = "urlfetch:%s" % hash.hexdigest()
        ref = ref_from_external_file(fetch_filename, id)

    return ref
示例#3
0
 def to_ref(self, refid):
     if self.str is not None:
         ref = ref_from_string(self.str, refid)
     else:
         ref = ref_from_external_file(self.filename, refid)
     return ref
示例#4
0
 def to_ref(self, refid):
     if self.str is not None:
         ref = ref_from_string(self.str, refid)
     else:
         ref = ref_from_external_file(self.filename, refid)
     return ref