def get_download_and_cache(): with util.FileWriteLock(cache_path): self.download_file_direct(hash, project_relpath, cache_path) # Make cache file read-only. util.subshell(['chmod', '-w', cache_path]) # Use cached file - `get_download()` has already checked the hash. get_cached(skip_sha_check=True)
def upload_file(self, hash, project_relpath, filepath): self._check_hash_type(hash) assert hash not in self._map dest = os.path.join(self._upload_dir, hash.get_value()) assert not os.path.exists(dest) dest_dir = os.path.dirname(dest) if not os.path.isdir(dest_dir): os.makedirs(dest_dir) # Copy the file. util.subshell(['cp', filepath, dest]) # Store the SHA. self._map[hash] = dest
def _check_hash(url, hash_expected): # TODO(eric.cousineau): It'd be nice to cache the downloaded file, if it's ever # useful. tmp_file = util.TmpFileName() with tmp_file: tmp_path = tmp_file.get_path() util.subshell('curl -s -o {} {}'.format(tmp_path, url)) hash = hash_expected.compute(tmp_path) good = (hash_expected == hash) if not good: util.eprint("WARNING: Hash mismatch for url: {}".format(url)) util.eprint(" expected:\n {}".format(hash_expected)) util.eprint(" url:\n {}".format(hash)) return good
def action(api_url, endpoint_in, query=None, token=None, args=[], method="GET"): """ Lightweight REST call """ extra_args = [] if token: extra_args += ["--header", "Girder-Token: {}".format(token)] if method != "GET": # https://serverfault.com/a/315852/443276 extra_args += ["-d", ""] endpoint = format_qs(endpoint_in, query) response_full = util.subshell([ "curl", "-X", method, "-s", "--write-out", "\n%{http_code}", "--header", "Accept: application/json" ] + args + extra_args + ["{}{}".format(api_url, endpoint)]) lines = response_full.splitlines() response = "\n".join(lines[0:-1]) code = int(lines[-1]) if code >= 400: raise RuntimeError("Bad response for: {}\n {}".format( endpoint, response)) return json.loads(response)
def test_files(self): # Go through each file and ensure that we have the desired contents. files = subshell("find data -name '*.bin'") for file in files.split('\n'): contents = open(file).read() file_name = os.path.basename(file) mock_contents = None for mock_name, mock_file_names in expected_files.iteritems(): if file_name in mock_file_names: mock_file = os.path.join(mock_dir, mock_name, file_name) mock_contents = open(mock_file).read() break self.assertEquals(contents, mock_contents)
def _has_file(url, hash, trusted=False): if not trusted: # Download the full file, and check the hash. return _check_hash(url, hash) else: # Just check header. first_line = util.subshell( 'curl -s --head {url} | head -n 1'.format(url=url)) code = int(re.match(r"^HTTP/1\.1 (\d+) .*$", first_line).group(1)) if code >= 400: return False elif code >= 200: return True else: raise RuntimeError("Unknown response: {}".format(first_line))
def has_file(self, hash, project_relpath): """ Returns true if the given hash exists on the given server. """ # TODO(eric.cousineau): Is there a quicker way to do this??? # TODO(eric.cousineau): Check `folder_id` and ensure it lives in the same place? # This is necessary if we have users with the same file? # What about authentication? Optional authentication / public access? args = self._download_args(hash) first_line = util.subshell( 'curl -s --head {args} | head -n 1'.format(args=args)) code = int(re.match(r"^HTTP/1\.1 (\d+) .*$", first_line).group(1)) if code >= 400: return False elif code >= 200: return True else: raise RuntimeError("Unknown response: {}".format(first_line))
def get_cached(skip_sha_check=False): # Can use cache. Copy to output path. if symlink: util.subshell(['ln', '-s', cache_path, output_file]) else: util.subshell(['cp', cache_path, output_file]) util.subshell(['chmod', '+w', output_file]) # On error, remove cached file, and re-download. if not skip_sha_check: if not hash.check_file(output_file, do_throw=False): util.eprint("SHA-512 mismatch. Removing old cached file, re-downloading.") # `os.remove()` will remove read-only files without prompting. os.remove(cache_path) if os.path.islink(output_file): # In this situation, the cache was corrupted (somehow), and Bazel # triggered a recompilation, and we still have a symlink in Bazel-space. # Remove this symlink, so that we do not download into a symlink (which # complicates the logic in `get_download_and_cache`). This also allows # us to "reset" permissions. os.remove(output_file) get_download_and_cache()
def download_file(self, hash, project_relpath, output_file): self._check_hash_type(hash) filepath = self._map.get(hash) if filepath is None: raise util.DownloadError("Unknown hash: {}".format(hash)) util.subshell(['cp', filepath, output_file])
def do_compute(self, filepath): value = util.subshell(['sha512sum', filepath]).split(' ')[0] return value