def _from_URL_fileopen(target_url): """opens files from a remote URL location""" import shutil import tempfile # parsing url in component parts (scheme, net_location, path, param, query, frag) = urlparse(target_url) # checks if string is URL link if scheme != "http" and scheme != "https" and scheme != "ftp": raise ValueError("Cannot open url: %s", target_url) # checks for dropbox link if net_location == 'www.dropbox.com': # changes dropbox http link into download link if query == "dl=0": query2 = "dl=1" # rebuild download URL, with new query2 variable target_url = urlunparse( (scheme, net_location, path, param, query2, "")) # checks for google drive link if net_location == 'drive.google.com': # link configuration for direct download instead of html frame google_directdl_frag = "https://docs.google.com/uc?export=download&id=" # pull file id (scheme, net_location, path_raw, param, query, frag) = urlparse(target_url) path = path_raw.split('/') id_file = path[3] # rebuild URL for direct download target_url = google_directdl_frag + id_file # save url to temporary file req = Request(target_url) res = urlopen(req) temp = tempfile.TemporaryFile() shutil.copyfileobj(res, temp) temp.seek(0) return temp
def _from_URL_fileopen(target_url): """opens files from a remote URL location""" import shutil, tempfile # parsing url in component parts (scheme, net_location, path, param, query, frag) = urlparse(target_url) # checks if string is URL link if scheme != "http" and scheme != "https" and scheme != "ftp": raise ValueError("Cannot open url: %s", target_url) # checks for dropbox link if net_location == 'www.dropbox.com': # changes dropbox http link into download link if query == "dl=0": query2 = "dl=1" # rebuild download URL, with new query2 variable target_url = urlunparse((scheme, net_location, path, param, query2, "")) # checks for google drive link if net_location == 'drive.google.com': # link configuration for direct download instead of html frame google_directdl_frag = "https://docs.google.com/uc?export=download&id=" # pull file id (scheme, net_location, path_raw, param, query, frag) = urlparse(target_url) path = path_raw.split('/') id_file = path[3] # rebuild URL for direct download target_url = google_directdl_frag + id_file # save url to temporary file req = Request(target_url) res = urlopen(req) temp = tempfile.TemporaryFile() shutil.copyfileobj(res, temp) temp.seek(0) return temp
def data_stream(self, identifier): url = self.data_url(identifier) return urlopen(url)