''' Copyright (C) 2015 Dato, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. ''' from graphlab.connect.server import _get_hadoop_class_path if __name__ == '__main__': classpath = _get_hadoop_class_path() with open('classpath.sh', 'w') as f: f.write('export CLASSPATH=%s\n' % classpath)
def make_internal_url(url): """ Takes a user input url string and translates into url relative to the server process. - URL to a local location begins with "local://" or has no "*://" modifier. If the server is local, returns the absolute path of the url. For example: "local:///tmp/foo" -> "/tmp/foo" and "./foo" -> os.path.abspath("./foo"). If the server is not local, raise NotImplementedError. - URL to a server location begins with "remote://". Returns the absolute path after the "remote://" modifier. For example: "remote:///tmp/foo" -> "/tmp/foo". - URL to a s3 location begins with "s3://": Returns the s3 URL with credentials filled in using graphlab.aws.get_aws_credential(). For example: "s3://mybucket/foo" -> "s3://$AWS_ACCESS_KEY_ID:$AWS_SECRET_ACCESS_KEY:mybucket/foo". - URL to other remote locations, e.g. http://, will remain as is. - Expands ~ to $HOME Parameters ---------- string A URL (as described above). Raises ------ ValueError If a bad url is provided. """ if not url: raise ValueError('Invalid url: %s' % url) # The final file path on server. path_on_server = None # Try to split the url into (protocol, path). urlsplit = url.split("://") if len(urlsplit) == 2: protocol, path = urlsplit if not path: raise ValueError('Invalid url: %s' % url) if protocol in ['http', 'https']: # protocol is a remote url not on server, just return return url elif protocol == 'hdfs': if isinstance(_glconnect.get_server(), _server.LocalServer) and not _server._get_hadoop_class_path(): raise ValueError("HDFS URL is not supported because Hadoop not found. Please make hadoop available from PATH or set the environment variable HADOOP_HOME and try again.") else: return url elif protocol == 's3': if len(path.split(":")) == 3: # s3 url already contains secret key/id pairs, just return return url else: # s3 url does not contain secret key/id pair, query the environment variables (k, v) = _get_aws_credentials() return 's3://' + k + ':' + v + ':' + path elif protocol == 'remote': # url for files on the server path_on_server = path elif protocol == 'local': # url for files on local client, check if we are connecting to local server if (isinstance(_glconnect.get_server(), _server.LocalServer)): path_on_server = path else: raise ValueError('Cannot use local URL when connecting to a remote server.') else: raise ValueError('Invalid url protocol %s. Supported url protocols are: remote://, local://, s3://, https:// and hdfs://' % protocol) elif len(urlsplit) == 1: # expand ~ to $HOME url = _os.path.expanduser(url) # url for files on local client, check if we are connecting to local server if (isinstance(_glconnect.get_server(), _server.LocalServer)): path_on_server = url else: raise ValueError('Cannot use local URL when connecting to a remote server.') else: raise ValueError('Invalid url: %s' % url) if path_on_server: return _os.path.abspath(_os.path.expanduser(path_on_server)) else: raise ValueError('Invalid url: %s' % url)