def _get_default_conf_dir(): from graphlab import sys_util classpath = sys_util.get_hadoop_class_path() if classpath: return classpath.split(':')[0] else: return "/etc/hadoop/conf"
def _make_internal_url(url): """ Takes a user input url string and translates into url relative to the server process. - URL to a local location begins with "local://" or has no "*://" modifier. If the server is local, returns the absolute path of the url. For example: "local:///tmp/foo" -> "/tmp/foo" and "./foo" -> os.path.abspath("./foo"). If the server is not local, raise NotImplementedError. - URL to a server location begins with "remote://". Returns the absolute path after the "remote://" modifier. For example: "remote:///tmp/foo" -> "/tmp/foo". - URL to a s3 location begins with "s3://": Returns the s3 URL with credentials filled in using graphlab.aws.get_aws_credential(). For example: "s3://mybucket/foo" -> "s3://$AWS_ACCESS_KEY_ID:$AWS_SECRET_ACCESS_KEY:mybucket/foo". - URL to other remote locations, e.g. http://, will remain as is. - Expands ~ to $HOME Parameters ---------- string A URL (as described above). Raises ------ ValueError If a bad url is provided. """ if not url: raise ValueError('Invalid url: %s' % url) # The final file path on server. path_on_server = None # Try to split the url into (protocol, path). urlsplit = url.split("://") if len(urlsplit) == 2: protocol, path = urlsplit if not path: raise ValueError('Invalid url: %s' % url) if protocol in ['http', 'https']: # protocol is a remote url not on server, just return return url elif protocol == 'hdfs': if isinstance(_glconnect.get_server(), _server.LocalServer ) and not _sys_util.get_hadoop_class_path(): raise ValueError( "HDFS URL is not supported because Hadoop not found. Please make hadoop available from PATH or set the environment variable HADOOP_HOME and try again." ) else: return url elif protocol == 's3': return _try_inject_s3_credentials(url) elif protocol == 'remote': # url for files on the server path_on_server = path elif protocol == 'local': # url for files on local client, check if we are connecting to local server if (isinstance(_glconnect.get_server(), _server.LocalServer)): path_on_server = path else: raise ValueError( 'Cannot use local URL when connecting to a remote server.') else: raise ValueError( 'Invalid url protocol %s. Supported url protocols are: remote://, local://, s3://, https:// and hdfs://' % protocol) elif len(urlsplit) == 1: # expand ~ to $HOME url = _os.path.expanduser(url) # url for files on local client, check if we are connecting to local server if (isinstance(_glconnect.get_server(), _server.LocalServer)): path_on_server = url else: raise ValueError( 'Cannot use local URL when connecting to a remote server.') else: raise ValueError('Invalid url: %s' % url) if path_on_server: return _os.path.abspath(_os.path.expanduser(path_on_server)) else: raise ValueError('Invalid url: %s' % url)
def setUpClass(self): self.has_hdfs = len(_sys_util.get_hadoop_class_path()) > 0 self.tempfile = tempfile.NamedTemporaryFile().name (self.graph, self.sframe, self.model) = create_test_objects()
def _make_internal_url(url): """ Takes a user input url string and translates into url relative to the server process. - URL to a local location begins with "local://" or has no "*://" modifier. If the server is local, returns the absolute path of the url. For example: "local:///tmp/foo" -> "/tmp/foo" and "./foo" -> os.path.abspath("./foo"). If the server is not local, raise NotImplementedError. - URL to a server location begins with "remote://". Returns the absolute path after the "remote://" modifier. For example: "remote:///tmp/foo" -> "/tmp/foo". - URL to a s3 location begins with "s3://": Returns the s3 URL with credentials filled in using graphlab.aws.get_aws_credential(). For example: "s3://mybucket/foo" -> "s3://$AWS_ACCESS_KEY_ID:$AWS_SECRET_ACCESS_KEY:mybucket/foo". - URL to other remote locations, e.g. http://, will remain as is. - Expands ~ to $HOME Parameters ---------- string A URL (as described above). Raises ------ ValueError If a bad url is provided. """ if not url: raise ValueError('Invalid url: %s' % url) # The final file path on server. path_on_server = None # Try to split the url into (protocol, path). urlsplit = url.split("://") if len(urlsplit) == 2: protocol, path = urlsplit if not path: raise ValueError('Invalid url: %s' % url) if protocol in ['http', 'https']: # protocol is a remote url not on server, just return return url elif protocol == 'hdfs': if isinstance(_glconnect.get_server(), _server.LocalServer) and not _sys_util.get_hadoop_class_path(): raise ValueError("HDFS URL is not supported because Hadoop not found. Please make hadoop available from PATH or set the environment variable HADOOP_HOME and try again.") else: return url elif protocol == 's3': return _try_inject_s3_credentials(url) elif protocol == 'remote': # url for files on the server path_on_server = path elif protocol == 'local': # url for files on local client, check if we are connecting to local server if (isinstance(_glconnect.get_server(), _server.LocalServer)): path_on_server = path else: raise ValueError('Cannot use local URL when connecting to a remote server.') else: raise ValueError('Invalid url protocol %s. Supported url protocols are: remote://, local://, s3://, https:// and hdfs://' % protocol) elif len(urlsplit) == 1: # expand ~ to $HOME url = _os.path.expanduser(url) # url for files on local client, check if we are connecting to local server if (isinstance(_glconnect.get_server(), _server.LocalServer)): path_on_server = url else: raise ValueError('Cannot use local URL when connecting to a remote server.') else: raise ValueError('Invalid url: %s' % url) if path_on_server: return _os.path.abspath(_os.path.expanduser(path_on_server)) else: raise ValueError('Invalid url: %s' % url)